Update OpenOrca.py
This commit is contained in:
parent
762ff80316
commit
4e4ee209e8
20
OpenOrca.py
20
OpenOrca.py
@ -20,20 +20,22 @@ class CustomDataset(DatasetBuilder):
|
||||
'006-flan-chatgpt', '007-gpt4_100k', '008-niv', '009-t0'] # add more as needed
|
||||
split_generators = []
|
||||
for folder in folders:
|
||||
train_file_path = os.path.join(base_path, f'{folder}/cot-train.jsonl')
|
||||
test_file_path = os.path.join(base_path, f'{folder}/cot-test.jsonl')
|
||||
if os.path.isfile(train_file_path) and os.path.isfile(test_file_path):
|
||||
split_generators.extend([
|
||||
SplitGenerator(name=f'{folder.replace("-", "_")}_train', gen_kwargs={"filepath": os.path.join(base_path, f'{folder}/train.jsonl')}),
|
||||
SplitGenerator(name=f'{folder.replace("-", "_")}_test', gen_kwargs={"filepath": os.path.join(base_path, f'{folder}/test.jsonl')}),
|
||||
SplitGenerator(name=f'{folder.replace("-", "_")}_train', gen_kwargs={"filepath": train_file_path}),
|
||||
SplitGenerator(name=f'{folder.replace("-", "_")}_test', gen_kwargs={"filepath": test_file_path}),
|
||||
])
|
||||
return split_generators
|
||||
|
||||
def _generate_examples(self, filepath):
|
||||
with open(filepath, 'r') as f:
|
||||
with open(filepath, 'r', encoding='utf-8') as f:
|
||||
for id_, line in enumerate(f):
|
||||
data = json.loads(line)
|
||||
data = json.loads(line.strip())
|
||||
yield id_, {
|
||||
'id': data['id'],
|
||||
'system_prompt': data['system_prompt'],
|
||||
'question': data['question'],
|
||||
'response': data['response']
|
||||
'id': data.get('id', ''),
|
||||
'system_prompt': data.get('system_prompt', ''),
|
||||
'question': data.get('question', ''),
|
||||
'response': data.get('response', '')
|
||||
}
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user