From 762ff80316130543abdc6e44f96b898bfab89b91 Mon Sep 17 00:00:00 2001 From: Alignment Lab AI Date: Thu, 29 Jun 2023 23:54:28 +0000 Subject: [PATCH] Update OpenOrca.py --- OpenOrca.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/OpenOrca.py b/OpenOrca.py index 3d4f215..ecdfcd6 100644 --- a/OpenOrca.py +++ b/OpenOrca.py @@ -15,14 +15,14 @@ class CustomDataset(DatasetBuilder): ) def _split_generators(self, dl_manager): - base_path = 'path_to_your_data' + base_path = dl_manager.download_and_extract('https://huggingface.co/datasets/Open-Orca/OpenOrca') folders = ['001-cot', '002-flan', '003-flan-1m', '004-flan1m-aug-shuf', '005-flan-5m', '006-flan-chatgpt', '007-gpt4_100k', '008-niv', '009-t0'] # add more as needed split_generators = [] for folder in folders: split_generators.extend([ - SplitGenerator(name=f'{folder.replace("-", "_")}_train', gen_kwargs={"filepath": f'{folder}/train.jsonl'}), - SplitGenerator(name=f'{folder.replace("-", "_")}_test', gen_kwargs={"filepath": f'{folder}/test.jsonl'}), + SplitGenerator(name=f'{folder.replace("-", "_")}_train', gen_kwargs={"filepath": os.path.join(base_path, f'{folder}/train.jsonl')}), + SplitGenerator(name=f'{folder.replace("-", "_")}_test', gen_kwargs={"filepath": os.path.join(base_path, f'{folder}/test.jsonl')}), ]) return split_generators @@ -36,3 +36,4 @@ class CustomDataset(DatasetBuilder): 'question': data['question'], 'response': data['response'] } +