Update OpenOrca.py
This commit is contained in:
parent
e30de90a53
commit
762ff80316
@ -15,14 +15,14 @@ class CustomDataset(DatasetBuilder):
|
||||
)
|
||||
|
||||
def _split_generators(self, dl_manager):
|
||||
base_path = 'path_to_your_data'
|
||||
base_path = dl_manager.download_and_extract('https://huggingface.co/datasets/Open-Orca/OpenOrca')
|
||||
folders = ['001-cot', '002-flan', '003-flan-1m', '004-flan1m-aug-shuf', '005-flan-5m',
|
||||
'006-flan-chatgpt', '007-gpt4_100k', '008-niv', '009-t0'] # add more as needed
|
||||
split_generators = []
|
||||
for folder in folders:
|
||||
split_generators.extend([
|
||||
SplitGenerator(name=f'{folder.replace("-", "_")}_train', gen_kwargs={"filepath": f'{folder}/train.jsonl'}),
|
||||
SplitGenerator(name=f'{folder.replace("-", "_")}_test', gen_kwargs={"filepath": f'{folder}/test.jsonl'}),
|
||||
SplitGenerator(name=f'{folder.replace("-", "_")}_train', gen_kwargs={"filepath": os.path.join(base_path, f'{folder}/train.jsonl')}),
|
||||
SplitGenerator(name=f'{folder.replace("-", "_")}_test', gen_kwargs={"filepath": os.path.join(base_path, f'{folder}/test.jsonl')}),
|
||||
])
|
||||
return split_generators
|
||||
|
||||
@ -36,3 +36,4 @@ class CustomDataset(DatasetBuilder):
|
||||
'question': data['question'],
|
||||
'response': data['response']
|
||||
}
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user