Update OpenOrca.py
This commit is contained in:
parent
4e4ee209e8
commit
5badb049ef
@ -17,11 +17,11 @@ class CustomDataset(DatasetBuilder):
|
||||
def _split_generators(self, dl_manager):
|
||||
base_path = dl_manager.download_and_extract('https://huggingface.co/datasets/Open-Orca/OpenOrca')
|
||||
folders = ['001-cot', '002-flan', '003-flan-1m', '004-flan1m-aug-shuf', '005-flan-5m',
|
||||
'006-flan-chatgpt', '007-gpt4_100k', '008-niv', '009-t0'] # add more as needed
|
||||
'006-flan-chatgpt', '007-gpt4_100k', '008-niv', '009-t0']
|
||||
split_generators = []
|
||||
for folder in folders:
|
||||
train_file_path = os.path.join(base_path, f'{folder}/cot-train.jsonl')
|
||||
test_file_path = os.path.join(base_path, f'{folder}/cot-test.jsonl')
|
||||
train_file_path = os.path.join(base_path, f'{folder}/{folder}-train.jsonl')
|
||||
test_file_path = os.path.join(base_path, f'{folder}/{follder}-test.jsonl')
|
||||
if os.path.isfile(train_file_path) and os.path.isfile(test_file_path):
|
||||
split_generators.extend([
|
||||
SplitGenerator(name=f'{folder.replace("-", "_")}_train', gen_kwargs={"filepath": train_file_path}),
|
||||
|
Loading…
x
Reference in New Issue
Block a user