Update OpenOrca.py
This commit is contained in:
parent
e30de90a53
commit
762ff80316
@ -15,14 +15,14 @@ class CustomDataset(DatasetBuilder):
|
|||||||
)
|
)
|
||||||
|
|
||||||
def _split_generators(self, dl_manager):
|
def _split_generators(self, dl_manager):
|
||||||
base_path = 'path_to_your_data'
|
base_path = dl_manager.download_and_extract('https://huggingface.co/datasets/Open-Orca/OpenOrca')
|
||||||
folders = ['001-cot', '002-flan', '003-flan-1m', '004-flan1m-aug-shuf', '005-flan-5m',
|
folders = ['001-cot', '002-flan', '003-flan-1m', '004-flan1m-aug-shuf', '005-flan-5m',
|
||||||
'006-flan-chatgpt', '007-gpt4_100k', '008-niv', '009-t0'] # add more as needed
|
'006-flan-chatgpt', '007-gpt4_100k', '008-niv', '009-t0'] # add more as needed
|
||||||
split_generators = []
|
split_generators = []
|
||||||
for folder in folders:
|
for folder in folders:
|
||||||
split_generators.extend([
|
split_generators.extend([
|
||||||
SplitGenerator(name=f'{folder.replace("-", "_")}_train', gen_kwargs={"filepath": f'{folder}/train.jsonl'}),
|
SplitGenerator(name=f'{folder.replace("-", "_")}_train', gen_kwargs={"filepath": os.path.join(base_path, f'{folder}/train.jsonl')}),
|
||||||
SplitGenerator(name=f'{folder.replace("-", "_")}_test', gen_kwargs={"filepath": f'{folder}/test.jsonl'}),
|
SplitGenerator(name=f'{folder.replace("-", "_")}_test', gen_kwargs={"filepath": os.path.join(base_path, f'{folder}/test.jsonl')}),
|
||||||
])
|
])
|
||||||
return split_generators
|
return split_generators
|
||||||
|
|
||||||
@ -36,3 +36,4 @@ class CustomDataset(DatasetBuilder):
|
|||||||
'question': data['question'],
|
'question': data['question'],
|
||||||
'response': data['response']
|
'response': data['response']
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user