144 lines
5.0 KiB
Python
144 lines
5.0 KiB
Python
|
import evaluate
|
||
|
from evaluate.evaluation_suite import SubTask
|
||
|
|
||
|
class MyEvaluationSuite(evaluate.EvaluationSuite):
|
||
|
|
||
|
def __init__(self, name):
|
||
|
super().__init__(name)
|
||
|
self.suite = [
|
||
|
SubTask(
|
||
|
task_type="text-classification",
|
||
|
data="OpenOrca",
|
||
|
subset="001-cot",
|
||
|
split="your-split",
|
||
|
args_for_task={
|
||
|
"metric": "your-metric",
|
||
|
"input_column": "your-input-column",
|
||
|
"label_column": "your-label-column",
|
||
|
"label_mapping": {
|
||
|
"your-label-0": 0.0,
|
||
|
"your-label-1": 1.0
|
||
|
}
|
||
|
}
|
||
|
),
|
||
|
SubTask(
|
||
|
task_type="text-classification",
|
||
|
data="OpenOrca",
|
||
|
subset="002-flan",
|
||
|
split="your-split",
|
||
|
args_for_task={
|
||
|
"metric": "your-metric",
|
||
|
"input_column": "your-input-column",
|
||
|
"label_column": "your-label-column",
|
||
|
"label_mapping": {
|
||
|
"your-label-0": 0,
|
||
|
"your-label-1": 1
|
||
|
}
|
||
|
}
|
||
|
),
|
||
|
SubTask(
|
||
|
task_type="text-classification",
|
||
|
data="OpenOrca",
|
||
|
subset="003-flan-1m",
|
||
|
split="your-split",
|
||
|
args_for_task={
|
||
|
"metric": "your-metric",
|
||
|
"input_column": "your-input-column",
|
||
|
"label_column": "your-label-column",
|
||
|
"label_mapping": {
|
||
|
"your-label-0": 0,
|
||
|
"your-label-1": 1
|
||
|
}
|
||
|
}
|
||
|
),
|
||
|
SubTask(
|
||
|
task_type="text-classification",
|
||
|
data="OpenOrca",
|
||
|
subset="004-flan1m-aug-shuf",
|
||
|
split="your-split",
|
||
|
args_for_task={
|
||
|
"metric": "your-metric",
|
||
|
"input_column": "your-input-column",
|
||
|
"label_column": "your-label-column",
|
||
|
"label_mapping": {
|
||
|
"your-label-0": 0,
|
||
|
"your-label-1": 1
|
||
|
}
|
||
|
}
|
||
|
),
|
||
|
SubTask(
|
||
|
task_type="text-classification",
|
||
|
data="OpenOrca",
|
||
|
subset="005-flan-5m",
|
||
|
split="your-split",
|
||
|
args_for_task={
|
||
|
"metric": "your-metric",
|
||
|
"input_column": "your-input-column",
|
||
|
"label_column": "your-label-column",
|
||
|
"label_mapping": {
|
||
|
"your-label-0": 0,
|
||
|
"your-label-1": 1
|
||
|
}
|
||
|
}
|
||
|
),
|
||
|
SubTask(
|
||
|
task_type="text-classification",
|
||
|
data="OpenOrca",
|
||
|
subset="006-flan-chatgpt",
|
||
|
split="your-split",
|
||
|
args_for_task={
|
||
|
"metric": "your-metric",
|
||
|
"input_column": "your-input-column",
|
||
|
"label_column": "your-label-column",
|
||
|
"label_mapping": {
|
||
|
"your-label-0": 0,
|
||
|
"your-label-1": 1
|
||
|
}
|
||
|
}
|
||
|
),
|
||
|
SubTask(
|
||
|
task_type="text-classification",
|
||
|
data="OpenOrca",
|
||
|
subset="007-gpt4_100k",
|
||
|
split="your-split",
|
||
|
args_for_task={
|
||
|
"metric": "your-metric",
|
||
|
"input_column": "your-input-column",
|
||
|
"label_column": "your-label-column",
|
||
|
"label_mapping": {
|
||
|
"your-label-0": 0,
|
||
|
"your-label-1": 1
|
||
|
}
|
||
|
}
|
||
|
),
|
||
|
SubTask(
|
||
|
task_type="text-classification",
|
||
|
data="OpenOrca",
|
||
|
subset="008-niv",
|
||
|
split="your-split",
|
||
|
args_for_task={
|
||
|
"metric": "your-metric",
|
||
|
"input_column": "your-input-column",
|
||
|
"label_column": "your-label-column",
|
||
|
"label_mapping": {
|
||
|
"your-label-0": 0,
|
||
|
"your-label-1": 1
|
||
|
}
|
||
|
}
|
||
|
),
|
||
|
SubTask(
|
||
|
task_type="text-classification",
|
||
|
data="OpenOrca",
|
||
|
subset="009-t0",
|
||
|
split="your-split",
|
||
|
args_for_task={
|
||
|
"metric": "your-metric",
|
||
|
"input_column": "your-input-column",
|
||
|
"label_column": "your-label-column",
|
||
|
"label_mapping": {
|
||
|
"your-label-0": 0,
|
||
|
"your-label-1": 1
|
||
|
}
|
||
|
}
|
||
|
)
|
||
|
]
|