Spaces:
Running
on
Zero
Running
on
Zero
Upload llmdolphin.py
Browse files- llmdolphin.py +16 -4
llmdolphin.py
CHANGED
@@ -28,11 +28,21 @@ llm_models = {
|
|
28 |
"Nemo-12B-Marlin-v7.Q4_K_M.gguf": ["mradermacher/Nemo-12B-Marlin-v7-GGUF", MessagesFormatterType.MISTRAL],
|
29 |
"NemoDori-v0.2-Upscaled.1-14B.Q4_K_M.gguf": ["mradermacher/NemoDori-v0.2-Upscaled.1-14B-GGUF", MessagesFormatterType.MISTRAL],
|
30 |
"Fireball-Mistral-Nemo-Base-2407-sft-v2.2a.Q4_K_M.gguf": ["mradermacher/Fireball-Mistral-Nemo-Base-2407-sft-v2.2a-GGUF", MessagesFormatterType.MISTRAL],
|
|
|
31 |
"Trinas_Nectar-8B-model_stock.i1-Q4_K_M.gguf": ["mradermacher/Trinas_Nectar-8B-model_stock-i1-GGUF", MessagesFormatterType.MISTRAL],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
32 |
"storiecreative-q5_k_m.gguf": ["ClaudioItaly/StorieCreative-Q5_K_M-GGUF", MessagesFormatterType.MISTRAL],
|
33 |
"L3.1-gramamax.Q5_K_M.gguf": ["mradermacher/L3.1-gramamax-GGUF", MessagesFormatterType.MISTRAL],
|
|
|
34 |
"sellen-8b-model_stock-q4_k_m.gguf": ["DreadPoor/Sellen-8B-model_stock-Q4_K_M-GGUF", MessagesFormatterType.MISTRAL],
|
35 |
"nokstella_coder-8b-model_stock-q4_k_m.gguf": ["DreadPoor/Nokstella_coder-8B-model_stock-Q4_K_M-GGUF", MessagesFormatterType.LLAMA_3],
|
|
|
36 |
"L3.1-Sithamo-v0.4-8B.q5_k_m.gguf": ["kromquant/L3.1-Siithamo-v0.4-8B-GGUFs", MessagesFormatterType.MISTRAL],
|
37 |
"Berry-Spark-7B-Fix.Q5_K_M.gguf": ["mradermacher/Berry-Spark-7B-Fix-GGUF", MessagesFormatterType.OPEN_CHAT],
|
38 |
"llama3.1-gutenberg-8B.Q4_K_S.gguf": ["mradermacher/llama3.1-gutenberg-8B-GGUF", MessagesFormatterType.LLAMA_3],
|
@@ -682,9 +692,10 @@ def dolphin_respond(
|
|
682 |
llm = Llama(
|
683 |
model_path=str(Path(f"{llm_models_dir}/{model}")),
|
684 |
flash_attn=True,
|
685 |
-
n_gpu_layers=81
|
686 |
n_batch=1024,
|
687 |
-
n_ctx=8192
|
|
|
688 |
)
|
689 |
provider = LlamaCppPythonProvider(llm)
|
690 |
|
@@ -776,9 +787,10 @@ def dolphin_respond_auto(
|
|
776 |
llm = Llama(
|
777 |
model_path=str(Path(f"{llm_models_dir}/{model}")),
|
778 |
flash_attn=True,
|
779 |
-
n_gpu_layers=81
|
780 |
n_batch=1024,
|
781 |
-
n_ctx=8192
|
|
|
782 |
)
|
783 |
provider = LlamaCppPythonProvider(llm)
|
784 |
|
|
|
28 |
"Nemo-12B-Marlin-v7.Q4_K_M.gguf": ["mradermacher/Nemo-12B-Marlin-v7-GGUF", MessagesFormatterType.MISTRAL],
|
29 |
"NemoDori-v0.2-Upscaled.1-14B.Q4_K_M.gguf": ["mradermacher/NemoDori-v0.2-Upscaled.1-14B-GGUF", MessagesFormatterType.MISTRAL],
|
30 |
"Fireball-Mistral-Nemo-Base-2407-sft-v2.2a.Q4_K_M.gguf": ["mradermacher/Fireball-Mistral-Nemo-Base-2407-sft-v2.2a-GGUF", MessagesFormatterType.MISTRAL],
|
31 |
+
"Deutscher-Pantheon-12B.Q4_K_M.gguf": ["mradermacher/Deutscher-Pantheon-12B-GGUF", MessagesFormatterType.MISTRAL],
|
32 |
"Trinas_Nectar-8B-model_stock.i1-Q4_K_M.gguf": ["mradermacher/Trinas_Nectar-8B-model_stock-i1-GGUF", MessagesFormatterType.MISTRAL],
|
33 |
+
"NemoMix-Unleashed-12B-Q4_K_M.gguf": ["bartowski/NemoMix-Unleashed-12B-GGUF", MessagesFormatterType.MISTRAL],
|
34 |
+
"IceTea21EnergyDrinkRPV13.Q4_K_S.gguf": ["mradermacher/IceTea21EnergyDrinkRPV13-GGUF", MessagesFormatterType.MISTRAL],
|
35 |
+
"MegaBeam-Mistral-7B-512k-Q5_K_M.gguf": ["bartowski/MegaBeam-Mistral-7B-512k-GGUF", MessagesFormatterType.MISTRAL],
|
36 |
+
"Chronos-Gold-12B-1.0-Q4_K_M.gguf": ["bartowski/Chronos-Gold-12B-1.0-GGUF", MessagesFormatterType.MISTRAL],
|
37 |
+
"L3.1-Romes-Ninomos-Maxxing.Q5_K_M.gguf": ["mradermacher/L3.1-Romes-Ninomos-Maxxing-GGUF", MessagesFormatterType.LLAMA_3],
|
38 |
+
"mistral-nemo-minitron-8b-base-q4_k_m.gguf": ["Daemontatox/Mistral-NeMo-Minitron-8B-Base-Q4_K_M-GGUF", MessagesFormatterType.MISTRAL],
|
39 |
+
"Nokstella_coder-8B-model_stock.i1-Q4_K_S.gguf": ["mradermacher/Nokstella_coder-8B-model_stock-i1-GGUF", MessagesFormatterType.LLAMA_3],
|
40 |
"storiecreative-q5_k_m.gguf": ["ClaudioItaly/StorieCreative-Q5_K_M-GGUF", MessagesFormatterType.MISTRAL],
|
41 |
"L3.1-gramamax.Q5_K_M.gguf": ["mradermacher/L3.1-gramamax-GGUF", MessagesFormatterType.MISTRAL],
|
42 |
+
"Evolutionstory128.Q5_K_M.gguf": ["mradermacher/Evolutionstory128-GGUF", MessagesFormatterType.CHATML],
|
43 |
"sellen-8b-model_stock-q4_k_m.gguf": ["DreadPoor/Sellen-8B-model_stock-Q4_K_M-GGUF", MessagesFormatterType.MISTRAL],
|
44 |
"nokstella_coder-8b-model_stock-q4_k_m.gguf": ["DreadPoor/Nokstella_coder-8B-model_stock-Q4_K_M-GGUF", MessagesFormatterType.LLAMA_3],
|
45 |
+
"Ultra-Instruct-12B-Q4_K_M.gguf": ["bartowski/Ultra-Instruct-12B-GGUF", MessagesFormatterType.MISTRAL],
|
46 |
"L3.1-Sithamo-v0.4-8B.q5_k_m.gguf": ["kromquant/L3.1-Siithamo-v0.4-8B-GGUFs", MessagesFormatterType.MISTRAL],
|
47 |
"Berry-Spark-7B-Fix.Q5_K_M.gguf": ["mradermacher/Berry-Spark-7B-Fix-GGUF", MessagesFormatterType.OPEN_CHAT],
|
48 |
"llama3.1-gutenberg-8B.Q4_K_S.gguf": ["mradermacher/llama3.1-gutenberg-8B-GGUF", MessagesFormatterType.LLAMA_3],
|
|
|
692 |
llm = Llama(
|
693 |
model_path=str(Path(f"{llm_models_dir}/{model}")),
|
694 |
flash_attn=True,
|
695 |
+
n_gpu_layers=35, # 81
|
696 |
n_batch=1024,
|
697 |
+
n_ctx=4096, #8192
|
698 |
+
n_threads=8,
|
699 |
)
|
700 |
provider = LlamaCppPythonProvider(llm)
|
701 |
|
|
|
787 |
llm = Llama(
|
788 |
model_path=str(Path(f"{llm_models_dir}/{model}")),
|
789 |
flash_attn=True,
|
790 |
+
n_gpu_layers=35, # 81
|
791 |
n_batch=1024,
|
792 |
+
n_ctx=4096, #8192
|
793 |
+
n_threads=8,
|
794 |
)
|
795 |
provider = LlamaCppPythonProvider(llm)
|
796 |
|