nklockiewicz commited on
Commit
c2294c2
·
verified ·
1 Parent(s): 65dfc9f

Upload 2 files

Browse files
Files changed (2) hide show
  1. tokenizer_config_350.json +23 -0
  2. tokenizer_lfm350.json +0 -0
tokenizer_config_350.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": null,
3
+ "backend": "tokenizers",
4
+ "bos_token": "<|startoftext|>",
5
+ "clean_up_tokenization_spaces": false,
6
+ "eos_token": "<|im_end|>",
7
+ "extra_special_tokens": [],
8
+ "is_local": true,
9
+ "legacy": false,
10
+ "model_input_names": [
11
+ "input_ids",
12
+ "attention_mask"
13
+ ],
14
+ "model_max_length": 1000000000000000019884624838656,
15
+ "model_specific_special_tokens": {},
16
+ "pad_token": "<|pad|>",
17
+ "sp_model_kwargs": {},
18
+ "spaces_between_special_tokens": false,
19
+ "tokenizer_class": "TokenizersBackend",
20
+ "use_default_system_prompt": false,
21
+ "use_fast": true,
22
+ "chat_template": "{{- bos_token -}}\n{%- set keep_past_thinking = keep_past_thinking | default(false) -%}\n{%- set ns = namespace(system_prompt=\"\") -%}\n{%- if messages[0][\"role\"] == \"system\" -%}\n {%- set sys_content = messages[0][\"content\"] -%}\n {%- if sys_content is not string -%}\n {%- for item in sys_content -%}\n {%- if item[\"type\"] == \"text\" -%}\n {%- set ns.system_prompt = ns.system_prompt + item[\"text\"] -%}\n {%- endif -%}\n {%- endfor -%}\n {%- else -%}\n {%- set ns.system_prompt = sys_content -%}\n {%- endif -%}\n {%- set messages = messages[1:] -%}\n{%- endif -%}\n{%- if tools -%}\n {%- set ns.system_prompt = ns.system_prompt + (\"\\n\" if ns.system_prompt else \"\") + \"List of tools: [\" -%}\n {%- for tool in tools -%}\n {%- if tool is not string -%}\n {%- set tool = tool | tojson -%}\n {%- endif -%}\n {%- set ns.system_prompt = ns.system_prompt + tool -%}\n {%- if not loop.last -%}\n {%- set ns.system_prompt = ns.system_prompt + \", \" -%}\n {%- endif -%}\n {%- endfor -%}\n {%- set ns.system_prompt = ns.system_prompt + \"]\" -%}\n{%- endif -%}\n{%- if ns.system_prompt -%}\n {{- \"<|im_start|>system\\n\" + ns.system_prompt + \"<|im_end|>\\n\" -}}\n{%- endif -%}\n{%- set ns.last_assistant_index = -1 -%}\n{%- for message in messages -%}\n {%- if message[\"role\"] == \"assistant\" -%}\n {%- set ns.last_assistant_index = loop.index0 -%}\n {%- endif -%}\n{%- endfor -%}\n{%- for message in messages -%}\n {{- \"<|im_start|>\" + message[\"role\"] + \"\\n\" -}}\n {%- set content = message[\"content\"] -%}\n {%- if content is not string -%}\n {%- set ns.content = \"\" -%}\n {%- for item in content -%}\n {%- if item[\"type\"] == \"image\" -%}\n {%- set ns.content = ns.content + \"<image>\" -%}\n {%- elif item[\"type\"] == \"text\" -%}\n {%- set ns.content = ns.content + item[\"text\"] -%}\n {%- else -%}\n {%- set ns.content = ns.content + item | tojson -%}\n {%- endif -%}\n {%- endfor -%}\n {%- set content = ns.content -%}\n {%- endif -%}\n {%- if message[\"role\"] == \"assistant\" and not keep_past_thinking and loop.index0 != ns.last_assistant_index -%}\n {%- if \"</think>\" in content -%}\n {%- set content = content.split(\"</think>\")[-1] | trim -%}\n {%- endif -%}\n {%- endif -%}\n {{- content + \"<|im_end|>\\n\" -}}\n{%- endfor -%}\n{%- if add_generation_prompt -%}\n {{- \"<|im_start|>assistant\\n\" -}}\n{%- endif -%}"
23
+ }
tokenizer_lfm350.json ADDED
The diff for this file is too large to render. See raw diff