Update tokenizer_2/tokenizer_config.json

2023-10-21 15:32:20 +00:00 · 2023-10-21 15:32:20 +00:00 · 645c9ca46d
commit 645c9ca46d
parent e762ad4cc9
1 changed files with 23 additions and 30 deletions
--- a/tokenizer_2/tokenizer_config.json
+++ b/tokenizer_2/tokenizer_config.json
@ -1,40 +1,33 @@
 {
  "add_prefix_space": false,
-  "added_tokens_decoder": {
+  "bos_token": {
-    "0": {
+    "__type": "AddedToken",
-      "content": "!",
+    "content": "<|startoftext|>",
-      "lstrip": false,
+    "lstrip": false,
-      "normalized": false,
+    "normalized": true,
-      "rstrip": false,
+    "rstrip": false,
-      "single_word": false,
+    "single_word": false
      "special": true
    },
    "49406": {
      "content": "<|startoftext|>",
      "lstrip": false,
      "normalized": true,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "49407": {
      "content": "<|endoftext|>",
      "lstrip": false,
      "normalized": true,
      "rstrip": false,
      "single_word": false,
      "special": true
    }
  },
  "additional_special_tokens": [],
  "bos_token": "<|startoftext|>",
  "clean_up_tokenization_spaces": true,
  "do_lower_case": true,
-  "eos_token": "<|endoftext|>",
+  "eos_token": {
    "__type": "AddedToken",
    "content": "<|endoftext|>",
    "lstrip": false,
    "normalized": true,
    "rstrip": false,
    "single_word": false
  },
  "errors": "replace",
  "model_max_length": 77,
  "pad_token": "!",
  "tokenizer_class": "CLIPTokenizer",
-  "tokenizer_file": null,
+  "unk_token": {
-  "unk_token": "<|endoftext|>"
+    "__type": "AddedToken",
    "content": "<|endoftext|>",
    "lstrip": false,
    "normalized": true,
    "rstrip": false,
    "single_word": false
  }
 }