From 645c9ca46d68465d018420b260e17ec19b7fcaae Mon Sep 17 00:00:00 2001 From: Yatharth Gupta Date: Sat, 21 Oct 2023 15:32:20 +0000 Subject: [PATCH] Update tokenizer_2/tokenizer_config.json --- tokenizer_2/tokenizer_config.json | 53 ++++++++++++++----------------- 1 file changed, 23 insertions(+), 30 deletions(-) diff --git a/tokenizer_2/tokenizer_config.json b/tokenizer_2/tokenizer_config.json index 98c925f..a8438e0 100644 --- a/tokenizer_2/tokenizer_config.json +++ b/tokenizer_2/tokenizer_config.json @@ -1,40 +1,33 @@ { "add_prefix_space": false, - "added_tokens_decoder": { - "0": { - "content": "!", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "49406": { - "content": "<|startoftext|>", - "lstrip": false, - "normalized": true, - "rstrip": false, - "single_word": false, - "special": true - }, - "49407": { - "content": "<|endoftext|>", - "lstrip": false, - "normalized": true, - "rstrip": false, - "single_word": false, - "special": true - } + "bos_token": { + "__type": "AddedToken", + "content": "<|startoftext|>", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false }, - "additional_special_tokens": [], - "bos_token": "<|startoftext|>", "clean_up_tokenization_spaces": true, "do_lower_case": true, - "eos_token": "<|endoftext|>", + "eos_token": { + "__type": "AddedToken", + "content": "<|endoftext|>", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false + }, "errors": "replace", "model_max_length": 77, "pad_token": "!", "tokenizer_class": "CLIPTokenizer", - "tokenizer_file": null, - "unk_token": "<|endoftext|>" + "unk_token": { + "__type": "AddedToken", + "content": "<|endoftext|>", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false + } }