Spaces:

kanhatakeyama
/

chatbotarena-ja

Running

a100 kh commited on 24 days ago

Commit

111a8d9

•

1 Parent(s): 7780e34

endpoint

Files changed (2) hide show

api_endpoints.json CHANGED Viewed

@@ -58,6 +58,32 @@
         "text-arena": true,
         "vision-arena": false
     },
     "google/gemma-2-27b-it": {
         "model_name": "google/gemma-2-27b-it",
         "api_type": "openai-custom-deepinfra",

         "text-arena": true,
         "vision-arena": false
     },
+    "meta-llama/Meta-Llama-3.1-405B-Instruct": {
+        "model_name": "meta-llama/Meta-Llama-3.1-405B-Instruct",
+        "api_type": "openai-custom-deepinfra",
+        "api_base": "https://api.deepinfra.com/v1/openai",
+        "env_api_key": "DEEPINFRA_API_KEY",
+        "anony_only": false,
+        "recommended_config": {
+            "temperature": 0.7,
+            "top_p": 1.0
+        },
+        "text-arena": true,
+        "vision-arena": false
+    },
+    "NousResearch/Hermes-3-Llama-3.1-405B": {
+        "model_name": "NousResearch/Hermes-3-Llama-3.1-405B",
+        "api_type": "openai-custom-deepinfra",
+        "api_base": "https://api.deepinfra.com/v1/openai",
+        "env_api_key": "DEEPINFRA_API_KEY",
+        "anony_only": false,
+        "recommended_config": {
+            "temperature": 0.7,
+            "top_p": 1.0
+        },
+        "text-arena": true,
+        "vision-arena": false
+    },
     "google/gemma-2-27b-it": {
         "model_name": "google/gemma-2-27b-it",
         "api_type": "openai-custom-deepinfra",

local/local_setup CHANGED Viewed

@@ -31,6 +31,10 @@ export CUDA_VISIBLE_DEVICES=0
 export CUDA_VISIBLE_DEVICES=1
 python -m vllm.entrypoints.openai.api_server --model tokyotech-llm/Llama-3.1-Swallow-70B-Instruct-v0.1 --max-model-len 4096 --port 8019 --gpu-memory-utilization 0.6 --trust-remote-code --quantization bitsandbytes --load-format bitsandbytes --api-key $VLLM_API_KEY
 #########################
 #launch ngrok
 ngrok http http://localhost:8765

 export CUDA_VISIBLE_DEVICES=1
 python -m vllm.entrypoints.openai.api_server --model tokyotech-llm/Llama-3.1-Swallow-70B-Instruct-v0.1 --max-model-len 4096 --port 8019 --gpu-memory-utilization 0.6 --trust-remote-code --quantization bitsandbytes --load-format bitsandbytes --api-key $VLLM_API_KEY
+#gemma
+#export CUDA_VISIBLE_DEVICES=1
+#../llama-server -m gemma-2-2B-jpn-it-BF16.gguf --n_gpu_layers 100 --port 8020
 #########################
 #launch ngrok
 ngrok http http://localhost:8765