Update OpenOrca.py
This commit is contained in:
parent
4e4ee209e8
commit
5badb049ef
@ -17,11 +17,11 @@ class CustomDataset(DatasetBuilder):
|
|||||||
def _split_generators(self, dl_manager):
|
def _split_generators(self, dl_manager):
|
||||||
base_path = dl_manager.download_and_extract('https://huggingface.co/datasets/Open-Orca/OpenOrca')
|
base_path = dl_manager.download_and_extract('https://huggingface.co/datasets/Open-Orca/OpenOrca')
|
||||||
folders = ['001-cot', '002-flan', '003-flan-1m', '004-flan1m-aug-shuf', '005-flan-5m',
|
folders = ['001-cot', '002-flan', '003-flan-1m', '004-flan1m-aug-shuf', '005-flan-5m',
|
||||||
'006-flan-chatgpt', '007-gpt4_100k', '008-niv', '009-t0'] # add more as needed
|
'006-flan-chatgpt', '007-gpt4_100k', '008-niv', '009-t0']
|
||||||
split_generators = []
|
split_generators = []
|
||||||
for folder in folders:
|
for folder in folders:
|
||||||
train_file_path = os.path.join(base_path, f'{folder}/cot-train.jsonl')
|
train_file_path = os.path.join(base_path, f'{folder}/{folder}-train.jsonl')
|
||||||
test_file_path = os.path.join(base_path, f'{folder}/cot-test.jsonl')
|
test_file_path = os.path.join(base_path, f'{folder}/{follder}-test.jsonl')
|
||||||
if os.path.isfile(train_file_path) and os.path.isfile(test_file_path):
|
if os.path.isfile(train_file_path) and os.path.isfile(test_file_path):
|
||||||
split_generators.extend([
|
split_generators.extend([
|
||||||
SplitGenerator(name=f'{folder.replace("-", "_")}_train', gen_kwargs={"filepath": train_file_path}),
|
SplitGenerator(name=f'{folder.replace("-", "_")}_train', gen_kwargs={"filepath": train_file_path}),
|
||||||
|
Loading…
x
Reference in New Issue
Block a user