Upload 2 files
Browse files- tokenizer_config_350.json +23 -0
- tokenizer_lfm350.json +0 -0
tokenizer_config_350.json
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"additional_special_tokens": null,
|
| 3 |
+
"backend": "tokenizers",
|
| 4 |
+
"bos_token": "<|startoftext|>",
|
| 5 |
+
"clean_up_tokenization_spaces": false,
|
| 6 |
+
"eos_token": "<|im_end|>",
|
| 7 |
+
"extra_special_tokens": [],
|
| 8 |
+
"is_local": true,
|
| 9 |
+
"legacy": false,
|
| 10 |
+
"model_input_names": [
|
| 11 |
+
"input_ids",
|
| 12 |
+
"attention_mask"
|
| 13 |
+
],
|
| 14 |
+
"model_max_length": 1000000000000000019884624838656,
|
| 15 |
+
"model_specific_special_tokens": {},
|
| 16 |
+
"pad_token": "<|pad|>",
|
| 17 |
+
"sp_model_kwargs": {},
|
| 18 |
+
"spaces_between_special_tokens": false,
|
| 19 |
+
"tokenizer_class": "TokenizersBackend",
|
| 20 |
+
"use_default_system_prompt": false,
|
| 21 |
+
"use_fast": true,
|
| 22 |
+
"chat_template": "{{- bos_token -}}\n{%- set keep_past_thinking = keep_past_thinking | default(false) -%}\n{%- set ns = namespace(system_prompt=\"\") -%}\n{%- if messages[0][\"role\"] == \"system\" -%}\n {%- set sys_content = messages[0][\"content\"] -%}\n {%- if sys_content is not string -%}\n {%- for item in sys_content -%}\n {%- if item[\"type\"] == \"text\" -%}\n {%- set ns.system_prompt = ns.system_prompt + item[\"text\"] -%}\n {%- endif -%}\n {%- endfor -%}\n {%- else -%}\n {%- set ns.system_prompt = sys_content -%}\n {%- endif -%}\n {%- set messages = messages[1:] -%}\n{%- endif -%}\n{%- if tools -%}\n {%- set ns.system_prompt = ns.system_prompt + (\"\\n\" if ns.system_prompt else \"\") + \"List of tools: [\" -%}\n {%- for tool in tools -%}\n {%- if tool is not string -%}\n {%- set tool = tool | tojson -%}\n {%- endif -%}\n {%- set ns.system_prompt = ns.system_prompt + tool -%}\n {%- if not loop.last -%}\n {%- set ns.system_prompt = ns.system_prompt + \", \" -%}\n {%- endif -%}\n {%- endfor -%}\n {%- set ns.system_prompt = ns.system_prompt + \"]\" -%}\n{%- endif -%}\n{%- if ns.system_prompt -%}\n {{- \"<|im_start|>system\\n\" + ns.system_prompt + \"<|im_end|>\\n\" -}}\n{%- endif -%}\n{%- set ns.last_assistant_index = -1 -%}\n{%- for message in messages -%}\n {%- if message[\"role\"] == \"assistant\" -%}\n {%- set ns.last_assistant_index = loop.index0 -%}\n {%- endif -%}\n{%- endfor -%}\n{%- for message in messages -%}\n {{- \"<|im_start|>\" + message[\"role\"] + \"\\n\" -}}\n {%- set content = message[\"content\"] -%}\n {%- if content is not string -%}\n {%- set ns.content = \"\" -%}\n {%- for item in content -%}\n {%- if item[\"type\"] == \"image\" -%}\n {%- set ns.content = ns.content + \"<image>\" -%}\n {%- elif item[\"type\"] == \"text\" -%}\n {%- set ns.content = ns.content + item[\"text\"] -%}\n {%- else -%}\n {%- set ns.content = ns.content + item | tojson -%}\n {%- endif -%}\n {%- endfor -%}\n {%- set content = ns.content -%}\n {%- endif -%}\n {%- if message[\"role\"] == \"assistant\" and not keep_past_thinking and loop.index0 != ns.last_assistant_index -%}\n {%- if \"</think>\" in content -%}\n {%- set content = content.split(\"</think>\")[-1] | trim -%}\n {%- endif -%}\n {%- endif -%}\n {{- content + \"<|im_end|>\\n\" -}}\n{%- endfor -%}\n{%- if add_generation_prompt -%}\n {{- \"<|im_start|>assistant\\n\" -}}\n{%- endif -%}"
|
| 23 |
+
}
|
tokenizer_lfm350.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|