Update OpenOrca.py
This commit is contained in:
parent
762ff80316
commit
4e4ee209e8
20
OpenOrca.py
20
OpenOrca.py
@ -20,20 +20,22 @@ class CustomDataset(DatasetBuilder):
|
|||||||
'006-flan-chatgpt', '007-gpt4_100k', '008-niv', '009-t0'] # add more as needed
|
'006-flan-chatgpt', '007-gpt4_100k', '008-niv', '009-t0'] # add more as needed
|
||||||
split_generators = []
|
split_generators = []
|
||||||
for folder in folders:
|
for folder in folders:
|
||||||
|
train_file_path = os.path.join(base_path, f'{folder}/cot-train.jsonl')
|
||||||
|
test_file_path = os.path.join(base_path, f'{folder}/cot-test.jsonl')
|
||||||
|
if os.path.isfile(train_file_path) and os.path.isfile(test_file_path):
|
||||||
split_generators.extend([
|
split_generators.extend([
|
||||||
SplitGenerator(name=f'{folder.replace("-", "_")}_train', gen_kwargs={"filepath": os.path.join(base_path, f'{folder}/train.jsonl')}),
|
SplitGenerator(name=f'{folder.replace("-", "_")}_train', gen_kwargs={"filepath": train_file_path}),
|
||||||
SplitGenerator(name=f'{folder.replace("-", "_")}_test', gen_kwargs={"filepath": os.path.join(base_path, f'{folder}/test.jsonl')}),
|
SplitGenerator(name=f'{folder.replace("-", "_")}_test', gen_kwargs={"filepath": test_file_path}),
|
||||||
])
|
])
|
||||||
return split_generators
|
return split_generators
|
||||||
|
|
||||||
def _generate_examples(self, filepath):
|
def _generate_examples(self, filepath):
|
||||||
with open(filepath, 'r') as f:
|
with open(filepath, 'r', encoding='utf-8') as f:
|
||||||
for id_, line in enumerate(f):
|
for id_, line in enumerate(f):
|
||||||
data = json.loads(line)
|
data = json.loads(line.strip())
|
||||||
yield id_, {
|
yield id_, {
|
||||||
'id': data['id'],
|
'id': data.get('id', ''),
|
||||||
'system_prompt': data['system_prompt'],
|
'system_prompt': data.get('system_prompt', ''),
|
||||||
'question': data['question'],
|
'question': data.get('question', ''),
|
||||||
'response': data['response']
|
'response': data.get('response', '')
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user