John6666 commited on
Commit
8b1b5ef
β€’
1 Parent(s): 33c69b3

Upload llmdolphin.py

Browse files
Files changed (1) hide show
  1. llmdolphin.py +16 -4
llmdolphin.py CHANGED
@@ -28,11 +28,21 @@ llm_models = {
28
  "Nemo-12B-Marlin-v7.Q4_K_M.gguf": ["mradermacher/Nemo-12B-Marlin-v7-GGUF", MessagesFormatterType.MISTRAL],
29
  "NemoDori-v0.2-Upscaled.1-14B.Q4_K_M.gguf": ["mradermacher/NemoDori-v0.2-Upscaled.1-14B-GGUF", MessagesFormatterType.MISTRAL],
30
  "Fireball-Mistral-Nemo-Base-2407-sft-v2.2a.Q4_K_M.gguf": ["mradermacher/Fireball-Mistral-Nemo-Base-2407-sft-v2.2a-GGUF", MessagesFormatterType.MISTRAL],
 
31
  "Trinas_Nectar-8B-model_stock.i1-Q4_K_M.gguf": ["mradermacher/Trinas_Nectar-8B-model_stock-i1-GGUF", MessagesFormatterType.MISTRAL],
 
 
 
 
 
 
 
32
  "storiecreative-q5_k_m.gguf": ["ClaudioItaly/StorieCreative-Q5_K_M-GGUF", MessagesFormatterType.MISTRAL],
33
  "L3.1-gramamax.Q5_K_M.gguf": ["mradermacher/L3.1-gramamax-GGUF", MessagesFormatterType.MISTRAL],
 
34
  "sellen-8b-model_stock-q4_k_m.gguf": ["DreadPoor/Sellen-8B-model_stock-Q4_K_M-GGUF", MessagesFormatterType.MISTRAL],
35
  "nokstella_coder-8b-model_stock-q4_k_m.gguf": ["DreadPoor/Nokstella_coder-8B-model_stock-Q4_K_M-GGUF", MessagesFormatterType.LLAMA_3],
 
36
  "L3.1-Sithamo-v0.4-8B.q5_k_m.gguf": ["kromquant/L3.1-Siithamo-v0.4-8B-GGUFs", MessagesFormatterType.MISTRAL],
37
  "Berry-Spark-7B-Fix.Q5_K_M.gguf": ["mradermacher/Berry-Spark-7B-Fix-GGUF", MessagesFormatterType.OPEN_CHAT],
38
  "llama3.1-gutenberg-8B.Q4_K_S.gguf": ["mradermacher/llama3.1-gutenberg-8B-GGUF", MessagesFormatterType.LLAMA_3],
@@ -682,9 +692,10 @@ def dolphin_respond(
682
  llm = Llama(
683
  model_path=str(Path(f"{llm_models_dir}/{model}")),
684
  flash_attn=True,
685
- n_gpu_layers=81,
686
  n_batch=1024,
687
- n_ctx=8192,
 
688
  )
689
  provider = LlamaCppPythonProvider(llm)
690
 
@@ -776,9 +787,10 @@ def dolphin_respond_auto(
776
  llm = Llama(
777
  model_path=str(Path(f"{llm_models_dir}/{model}")),
778
  flash_attn=True,
779
- n_gpu_layers=81,
780
  n_batch=1024,
781
- n_ctx=8192,
 
782
  )
783
  provider = LlamaCppPythonProvider(llm)
784
 
 
28
  "Nemo-12B-Marlin-v7.Q4_K_M.gguf": ["mradermacher/Nemo-12B-Marlin-v7-GGUF", MessagesFormatterType.MISTRAL],
29
  "NemoDori-v0.2-Upscaled.1-14B.Q4_K_M.gguf": ["mradermacher/NemoDori-v0.2-Upscaled.1-14B-GGUF", MessagesFormatterType.MISTRAL],
30
  "Fireball-Mistral-Nemo-Base-2407-sft-v2.2a.Q4_K_M.gguf": ["mradermacher/Fireball-Mistral-Nemo-Base-2407-sft-v2.2a-GGUF", MessagesFormatterType.MISTRAL],
31
+ "Deutscher-Pantheon-12B.Q4_K_M.gguf": ["mradermacher/Deutscher-Pantheon-12B-GGUF", MessagesFormatterType.MISTRAL],
32
  "Trinas_Nectar-8B-model_stock.i1-Q4_K_M.gguf": ["mradermacher/Trinas_Nectar-8B-model_stock-i1-GGUF", MessagesFormatterType.MISTRAL],
33
+ "NemoMix-Unleashed-12B-Q4_K_M.gguf": ["bartowski/NemoMix-Unleashed-12B-GGUF", MessagesFormatterType.MISTRAL],
34
+ "IceTea21EnergyDrinkRPV13.Q4_K_S.gguf": ["mradermacher/IceTea21EnergyDrinkRPV13-GGUF", MessagesFormatterType.MISTRAL],
35
+ "MegaBeam-Mistral-7B-512k-Q5_K_M.gguf": ["bartowski/MegaBeam-Mistral-7B-512k-GGUF", MessagesFormatterType.MISTRAL],
36
+ "Chronos-Gold-12B-1.0-Q4_K_M.gguf": ["bartowski/Chronos-Gold-12B-1.0-GGUF", MessagesFormatterType.MISTRAL],
37
+ "L3.1-Romes-Ninomos-Maxxing.Q5_K_M.gguf": ["mradermacher/L3.1-Romes-Ninomos-Maxxing-GGUF", MessagesFormatterType.LLAMA_3],
38
+ "mistral-nemo-minitron-8b-base-q4_k_m.gguf": ["Daemontatox/Mistral-NeMo-Minitron-8B-Base-Q4_K_M-GGUF", MessagesFormatterType.MISTRAL],
39
+ "Nokstella_coder-8B-model_stock.i1-Q4_K_S.gguf": ["mradermacher/Nokstella_coder-8B-model_stock-i1-GGUF", MessagesFormatterType.LLAMA_3],
40
  "storiecreative-q5_k_m.gguf": ["ClaudioItaly/StorieCreative-Q5_K_M-GGUF", MessagesFormatterType.MISTRAL],
41
  "L3.1-gramamax.Q5_K_M.gguf": ["mradermacher/L3.1-gramamax-GGUF", MessagesFormatterType.MISTRAL],
42
+ "Evolutionstory128.Q5_K_M.gguf": ["mradermacher/Evolutionstory128-GGUF", MessagesFormatterType.CHATML],
43
  "sellen-8b-model_stock-q4_k_m.gguf": ["DreadPoor/Sellen-8B-model_stock-Q4_K_M-GGUF", MessagesFormatterType.MISTRAL],
44
  "nokstella_coder-8b-model_stock-q4_k_m.gguf": ["DreadPoor/Nokstella_coder-8B-model_stock-Q4_K_M-GGUF", MessagesFormatterType.LLAMA_3],
45
+ "Ultra-Instruct-12B-Q4_K_M.gguf": ["bartowski/Ultra-Instruct-12B-GGUF", MessagesFormatterType.MISTRAL],
46
  "L3.1-Sithamo-v0.4-8B.q5_k_m.gguf": ["kromquant/L3.1-Siithamo-v0.4-8B-GGUFs", MessagesFormatterType.MISTRAL],
47
  "Berry-Spark-7B-Fix.Q5_K_M.gguf": ["mradermacher/Berry-Spark-7B-Fix-GGUF", MessagesFormatterType.OPEN_CHAT],
48
  "llama3.1-gutenberg-8B.Q4_K_S.gguf": ["mradermacher/llama3.1-gutenberg-8B-GGUF", MessagesFormatterType.LLAMA_3],
 
692
  llm = Llama(
693
  model_path=str(Path(f"{llm_models_dir}/{model}")),
694
  flash_attn=True,
695
+ n_gpu_layers=35, # 81
696
  n_batch=1024,
697
+ n_ctx=4096, #8192
698
+ n_threads=8,
699
  )
700
  provider = LlamaCppPythonProvider(llm)
701
 
 
787
  llm = Llama(
788
  model_path=str(Path(f"{llm_models_dir}/{model}")),
789
  flash_attn=True,
790
+ n_gpu_layers=35, # 81
791
  n_batch=1024,
792
+ n_ctx=4096, #8192
793
+ n_threads=8,
794
  )
795
  provider = LlamaCppPythonProvider(llm)
796