Update OpenOrca.py

This commit is contained in:
Alignment Lab AI 2023-06-30 00:07:17 +00:00 committed by huggingface-web
parent 4e4ee209e8
commit 5badb049ef

@ -17,11 +17,11 @@ class CustomDataset(DatasetBuilder):
def _split_generators(self, dl_manager):
base_path = dl_manager.download_and_extract('https://huggingface.co/datasets/Open-Orca/OpenOrca')
folders = ['001-cot', '002-flan', '003-flan-1m', '004-flan1m-aug-shuf', '005-flan-5m',
'006-flan-chatgpt', '007-gpt4_100k', '008-niv', '009-t0'] # add more as needed
'006-flan-chatgpt', '007-gpt4_100k', '008-niv', '009-t0']
split_generators = []
for folder in folders:
train_file_path = os.path.join(base_path, f'{folder}/cot-train.jsonl')
test_file_path = os.path.join(base_path, f'{folder}/cot-test.jsonl')
train_file_path = os.path.join(base_path, f'{folder}/{folder}-train.jsonl')
test_file_path = os.path.join(base_path, f'{folder}/{follder}-test.jsonl')
if os.path.isfile(train_file_path) and os.path.isfile(test_file_path):
split_generators.extend([
SplitGenerator(name=f'{folder.replace("-", "_")}_train', gen_kwargs={"filepath": train_file_path}),