John6666 commited on
Commit
a108184
β€’
1 Parent(s): fccbd1a

Upload 2 files

Browse files
Files changed (2) hide show
  1. formatter.py +43 -0
  2. llmdolphin.py +4 -1
formatter.py ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from llama_cpp_agent.messages_formatter import MessagesFormatter, PromptMarkers, Roles
2
+
3
+ mistral_v1_markers = {
4
+ Roles.system: PromptMarkers(""" [INST]""", """ [/INST] Understood.</s>"""),
5
+ Roles.user: PromptMarkers(""" [INST]""", """ [/INST]"""),
6
+ Roles.assistant: PromptMarkers(" ", "</s>"),
7
+ Roles.tool: PromptMarkers("", ""),
8
+ }
9
+
10
+ mistral_v1_formatter = MessagesFormatter(
11
+ pre_prompt="",
12
+ prompt_markers=mistral_v1_markers,
13
+ include_sys_prompt_in_first_user_message=False,
14
+ default_stop_sequences=["</s>"]
15
+ )
16
+
17
+ mistral_v2_markers = {
18
+ Roles.system: PromptMarkers("""[INST] """, """[/INST] Understood.</s>"""),
19
+ Roles.user: PromptMarkers("""[INST] """, """[/INST]"""),
20
+ Roles.assistant: PromptMarkers(" ", "</s>"),
21
+ Roles.tool: PromptMarkers("", ""),
22
+ }
23
+
24
+ mistral_v2_formatter = MessagesFormatter(
25
+ pre_prompt="",
26
+ prompt_markers=mistral_v2_markers,
27
+ include_sys_prompt_in_first_user_message=False,
28
+ default_stop_sequences=["</s>"]
29
+ )
30
+
31
+ mistral_v3_tekken_markers = {
32
+ Roles.system: PromptMarkers("""[INST]""", """[/INST]Understood.</s>"""),
33
+ Roles.user: PromptMarkers("""[INST]""", """[/INST]"""),
34
+ Roles.assistant: PromptMarkers("", "</s>"),
35
+ Roles.tool: PromptMarkers("", ""),
36
+ }
37
+
38
+ mistral_v3_tekken_formatter = MessagesFormatter(
39
+ pre_prompt="",
40
+ prompt_markers=mistral_v3_tekken_markers,
41
+ include_sys_prompt_in_first_user_message=False,
42
+ default_stop_sequences=["</s>"]
43
+ )
llmdolphin.py CHANGED
@@ -8,7 +8,7 @@ from llama_cpp_agent.chat_history.messages import Roles
8
  from ja_to_danbooru.ja_to_danbooru import jatags_to_danbooru_tags
9
  import wrapt_timeout_decorator
10
  from pathlib import Path
11
-
12
 
13
  llm_models_dir = "./llm_models"
14
  llm_models = {
@@ -821,6 +821,9 @@ llm_formats = {
821
  "DeepSeek Coder v2": MessagesFormatterType.DEEP_SEEK_CODER_2,
822
  "Gemma 2": MessagesFormatterType.ALPACA,
823
  "Qwen2": MessagesFormatterType.OPEN_CHAT,
 
 
 
824
  }
825
  # https://github.com/Maximilian-Winter/llama-cpp-agent
826
  llm_languages = ["English", "Japanese", "Chinese", "Korean", "Spanish", "Portuguese", "German", "French", "Finnish", "Russian"]
 
8
  from ja_to_danbooru.ja_to_danbooru import jatags_to_danbooru_tags
9
  import wrapt_timeout_decorator
10
  from pathlib import Path
11
+ from formatter import mistral_v1_formatter, mistral_v2_formatter, mistral_v3_tekken_formatter
12
 
13
  llm_models_dir = "./llm_models"
14
  llm_models = {
 
821
  "DeepSeek Coder v2": MessagesFormatterType.DEEP_SEEK_CODER_2,
822
  "Gemma 2": MessagesFormatterType.ALPACA,
823
  "Qwen2": MessagesFormatterType.OPEN_CHAT,
824
+ "Mistral Tokenizer V1": mistral_v1_formatter,
825
+ "Mistral Tokenizer V2": mistral_v2_formatter,
826
+ "Mistral Tokenizer V3 - Tekken": mistral_v3_tekken_formatter,
827
  }
828
  # https://github.com/Maximilian-Winter/llama-cpp-agent
829
  llm_languages = ["English", "Japanese", "Chinese", "Korean", "Spanish", "Portuguese", "German", "French", "Finnish", "Russian"]