John6666 commited on
Commit
4cba2ef
β€’
1 Parent(s): 8f20fda

Upload llmdolphin.py

Browse files
Files changed (1) hide show
  1. llmdolphin.py +12 -3
llmdolphin.py CHANGED
@@ -27,16 +27,25 @@ llm_models = {
27
  "Nemo-12B-Marlin-v5-Q4_K_M.gguf": ["starble-dev/Nemo-12B-Marlin-v5-GGUF", MessagesFormatterType.CHATML],
28
  "Nemo-12B-Marlin-v7.Q4_K_M.gguf": ["mradermacher/Nemo-12B-Marlin-v7-GGUF", MessagesFormatterType.MISTRAL],
29
  "NemoDori-v0.2-Upscaled.1-14B.Q4_K_M.gguf": ["mradermacher/NemoDori-v0.2-Upscaled.1-14B-GGUF", MessagesFormatterType.MISTRAL],
 
30
  "Fireball-Mistral-Nemo-Base-2407-sft-v2.2a.Q4_K_M.gguf": ["mradermacher/Fireball-Mistral-Nemo-Base-2407-sft-v2.2a-GGUF", MessagesFormatterType.MISTRAL],
 
31
  "Deutscher-Pantheon-12B.Q4_K_M.gguf": ["mradermacher/Deutscher-Pantheon-12B-GGUF", MessagesFormatterType.MISTRAL],
32
  "Trinas_Nectar-8B-model_stock.i1-Q4_K_M.gguf": ["mradermacher/Trinas_Nectar-8B-model_stock-i1-GGUF", MessagesFormatterType.MISTRAL],
 
 
 
 
 
33
  "NemoMix-Unleashed-12B-Q4_K_M.gguf": ["bartowski/NemoMix-Unleashed-12B-GGUF", MessagesFormatterType.MISTRAL],
34
  "IceTea21EnergyDrinkRPV13.Q4_K_S.gguf": ["mradermacher/IceTea21EnergyDrinkRPV13-GGUF", MessagesFormatterType.MISTRAL],
35
  "MegaBeam-Mistral-7B-512k-Q5_K_M.gguf": ["bartowski/MegaBeam-Mistral-7B-512k-GGUF", MessagesFormatterType.MISTRAL],
 
36
  "Chronos-Gold-12B-1.0-Q4_K_M.gguf": ["bartowski/Chronos-Gold-12B-1.0-GGUF", MessagesFormatterType.MISTRAL],
37
  "L3.1-Romes-Ninomos-Maxxing.Q5_K_M.gguf": ["mradermacher/L3.1-Romes-Ninomos-Maxxing-GGUF", MessagesFormatterType.LLAMA_3],
38
  "mistral-nemo-minitron-8b-base-q4_k_m.gguf": ["Daemontatox/Mistral-NeMo-Minitron-8B-Base-Q4_K_M-GGUF", MessagesFormatterType.MISTRAL],
39
  "Nokstella_coder-8B-model_stock.i1-Q4_K_S.gguf": ["mradermacher/Nokstella_coder-8B-model_stock-i1-GGUF", MessagesFormatterType.LLAMA_3],
 
40
  "storiecreative-q5_k_m.gguf": ["ClaudioItaly/StorieCreative-Q5_K_M-GGUF", MessagesFormatterType.MISTRAL],
41
  "L3.1-gramamax.Q5_K_M.gguf": ["mradermacher/L3.1-gramamax-GGUF", MessagesFormatterType.MISTRAL],
42
  "Evolutionstory128.Q5_K_M.gguf": ["mradermacher/Evolutionstory128-GGUF", MessagesFormatterType.CHATML],
@@ -668,7 +677,7 @@ def get_raw_prompt(msg: str):
668
  return re.sub(r'[*/:_"#\n]', ' ', ", ".join(m)).lower() if m else ""
669
 
670
 
671
- @spaces.GPU(duration=30)
672
  def dolphin_respond(
673
  message: str,
674
  history: list[tuple[str, str]],
@@ -761,7 +770,7 @@ def dolphin_parse(
761
  return ", ".join(prompts), gr.update(interactive=True), gr.update(interactive=True)
762
 
763
 
764
- @spaces.GPU(duration=30)
765
  def dolphin_respond_auto(
766
  message: str,
767
  history: list[tuple[str, str]],
@@ -788,7 +797,7 @@ def dolphin_respond_auto(
788
  flash_attn=True,
789
  n_gpu_layers=81, # 81
790
  n_batch=1024,
791
- n_ctx=4096, #8192
792
  )
793
  provider = LlamaCppPythonProvider(llm)
794
 
 
27
  "Nemo-12B-Marlin-v5-Q4_K_M.gguf": ["starble-dev/Nemo-12B-Marlin-v5-GGUF", MessagesFormatterType.CHATML],
28
  "Nemo-12B-Marlin-v7.Q4_K_M.gguf": ["mradermacher/Nemo-12B-Marlin-v7-GGUF", MessagesFormatterType.MISTRAL],
29
  "NemoDori-v0.2-Upscaled.1-14B.Q4_K_M.gguf": ["mradermacher/NemoDori-v0.2-Upscaled.1-14B-GGUF", MessagesFormatterType.MISTRAL],
30
+ "Fireball-12B-v1.0.i1-Q4_K_M.gguf": ["mradermacher/Fireball-12B-v1.0-i1-GGUF", MessagesFormatterType.MISTRAL],
31
  "Fireball-Mistral-Nemo-Base-2407-sft-v2.2a.Q4_K_M.gguf": ["mradermacher/Fireball-Mistral-Nemo-Base-2407-sft-v2.2a-GGUF", MessagesFormatterType.MISTRAL],
32
+ "StorieCreative.i1-Q4_K_S.gguf": ["mradermacher/StorieCreative-i1-GGUF", MessagesFormatterType.MISTRAL],
33
  "Deutscher-Pantheon-12B.Q4_K_M.gguf": ["mradermacher/Deutscher-Pantheon-12B-GGUF", MessagesFormatterType.MISTRAL],
34
  "Trinas_Nectar-8B-model_stock.i1-Q4_K_M.gguf": ["mradermacher/Trinas_Nectar-8B-model_stock-i1-GGUF", MessagesFormatterType.MISTRAL],
35
+ "Loki-v5.2.Q5_K_M.gguf": ["mradermacher/Loki-v5.2-GGUF", MessagesFormatterType.MISTRAL],
36
+ "Loki-v5.1.Q5_K_M.gguf": ["mradermacher/Loki-v5.1-GGUF", MessagesFormatterType.MISTRAL],
37
+ "GracieRP-freefallenLora-Gemma2-Inst-9B.i1-Q4_K_M.gguf": ["mradermacher/GracieRP-freefallenLora-Gemma2-Inst-9B-i1-GGUF", MessagesFormatterType.ALPACA],
38
+ "mistral-nemo-gutenberg-12B-v4.Q4_K_M.gguf": ["mradermacher/mistral-nemo-gutenberg-12B-v4-GGUF", MessagesFormatterType.MISTRAL],
39
+ "FunkyMerge-12b-0.1.Q4_K_M.gguf": ["mradermacher/FunkyMerge-12b-0.1-GGUF", MessagesFormatterType.MISTRAL],
40
  "NemoMix-Unleashed-12B-Q4_K_M.gguf": ["bartowski/NemoMix-Unleashed-12B-GGUF", MessagesFormatterType.MISTRAL],
41
  "IceTea21EnergyDrinkRPV13.Q4_K_S.gguf": ["mradermacher/IceTea21EnergyDrinkRPV13-GGUF", MessagesFormatterType.MISTRAL],
42
  "MegaBeam-Mistral-7B-512k-Q5_K_M.gguf": ["bartowski/MegaBeam-Mistral-7B-512k-GGUF", MessagesFormatterType.MISTRAL],
43
+ "azur-8b-model_stock-q4_k_m.gguf": ["DreadPoor/Azur-8B-model_stock-Q4_K_M-GGUF", MessagesFormatterType.LLAMA_3],
44
  "Chronos-Gold-12B-1.0-Q4_K_M.gguf": ["bartowski/Chronos-Gold-12B-1.0-GGUF", MessagesFormatterType.MISTRAL],
45
  "L3.1-Romes-Ninomos-Maxxing.Q5_K_M.gguf": ["mradermacher/L3.1-Romes-Ninomos-Maxxing-GGUF", MessagesFormatterType.LLAMA_3],
46
  "mistral-nemo-minitron-8b-base-q4_k_m.gguf": ["Daemontatox/Mistral-NeMo-Minitron-8B-Base-Q4_K_M-GGUF", MessagesFormatterType.MISTRAL],
47
  "Nokstella_coder-8B-model_stock.i1-Q4_K_S.gguf": ["mradermacher/Nokstella_coder-8B-model_stock-i1-GGUF", MessagesFormatterType.LLAMA_3],
48
+ "vtion_model_v1.Q5_K_M.gguf": ["mradermacher/vtion_model_v1-GGUF", MessagesFormatterType.LLAMA_3],
49
  "storiecreative-q5_k_m.gguf": ["ClaudioItaly/StorieCreative-Q5_K_M-GGUF", MessagesFormatterType.MISTRAL],
50
  "L3.1-gramamax.Q5_K_M.gguf": ["mradermacher/L3.1-gramamax-GGUF", MessagesFormatterType.MISTRAL],
51
  "Evolutionstory128.Q5_K_M.gguf": ["mradermacher/Evolutionstory128-GGUF", MessagesFormatterType.CHATML],
 
677
  return re.sub(r'[*/:_"#\n]', ' ', ", ".join(m)).lower() if m else ""
678
 
679
 
680
+ @spaces.GPU(duration=60)
681
  def dolphin_respond(
682
  message: str,
683
  history: list[tuple[str, str]],
 
770
  return ", ".join(prompts), gr.update(interactive=True), gr.update(interactive=True)
771
 
772
 
773
+ @spaces.GPU(duration=60)
774
  def dolphin_respond_auto(
775
  message: str,
776
  history: list[tuple[str, str]],
 
797
  flash_attn=True,
798
  n_gpu_layers=81, # 81
799
  n_batch=1024,
800
+ n_ctx=8192, #8192
801
  )
802
  provider = LlamaCppPythonProvider(llm)
803