Spaces:
Running
Running
a100 kh
commited on
Commit
•
111a8d9
1
Parent(s):
7780e34
endpoint
Browse files- api_endpoints.json +26 -0
- local/local_setup +4 -0
api_endpoints.json
CHANGED
@@ -58,6 +58,32 @@
|
|
58 |
"text-arena": true,
|
59 |
"vision-arena": false
|
60 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
61 |
"google/gemma-2-27b-it": {
|
62 |
"model_name": "google/gemma-2-27b-it",
|
63 |
"api_type": "openai-custom-deepinfra",
|
|
|
58 |
"text-arena": true,
|
59 |
"vision-arena": false
|
60 |
},
|
61 |
+
"meta-llama/Meta-Llama-3.1-405B-Instruct": {
|
62 |
+
"model_name": "meta-llama/Meta-Llama-3.1-405B-Instruct",
|
63 |
+
"api_type": "openai-custom-deepinfra",
|
64 |
+
"api_base": "https://api.deepinfra.com/v1/openai",
|
65 |
+
"env_api_key": "DEEPINFRA_API_KEY",
|
66 |
+
"anony_only": false,
|
67 |
+
"recommended_config": {
|
68 |
+
"temperature": 0.7,
|
69 |
+
"top_p": 1.0
|
70 |
+
},
|
71 |
+
"text-arena": true,
|
72 |
+
"vision-arena": false
|
73 |
+
},
|
74 |
+
"NousResearch/Hermes-3-Llama-3.1-405B": {
|
75 |
+
"model_name": "NousResearch/Hermes-3-Llama-3.1-405B",
|
76 |
+
"api_type": "openai-custom-deepinfra",
|
77 |
+
"api_base": "https://api.deepinfra.com/v1/openai",
|
78 |
+
"env_api_key": "DEEPINFRA_API_KEY",
|
79 |
+
"anony_only": false,
|
80 |
+
"recommended_config": {
|
81 |
+
"temperature": 0.7,
|
82 |
+
"top_p": 1.0
|
83 |
+
},
|
84 |
+
"text-arena": true,
|
85 |
+
"vision-arena": false
|
86 |
+
},
|
87 |
"google/gemma-2-27b-it": {
|
88 |
"model_name": "google/gemma-2-27b-it",
|
89 |
"api_type": "openai-custom-deepinfra",
|
local/local_setup
CHANGED
@@ -31,6 +31,10 @@ export CUDA_VISIBLE_DEVICES=0
|
|
31 |
export CUDA_VISIBLE_DEVICES=1
|
32 |
python -m vllm.entrypoints.openai.api_server --model tokyotech-llm/Llama-3.1-Swallow-70B-Instruct-v0.1 --max-model-len 4096 --port 8019 --gpu-memory-utilization 0.6 --trust-remote-code --quantization bitsandbytes --load-format bitsandbytes --api-key $VLLM_API_KEY
|
33 |
|
|
|
|
|
|
|
|
|
34 |
#########################
|
35 |
#launch ngrok
|
36 |
ngrok http http://localhost:8765
|
|
|
31 |
export CUDA_VISIBLE_DEVICES=1
|
32 |
python -m vllm.entrypoints.openai.api_server --model tokyotech-llm/Llama-3.1-Swallow-70B-Instruct-v0.1 --max-model-len 4096 --port 8019 --gpu-memory-utilization 0.6 --trust-remote-code --quantization bitsandbytes --load-format bitsandbytes --api-key $VLLM_API_KEY
|
33 |
|
34 |
+
#gemma
|
35 |
+
#export CUDA_VISIBLE_DEVICES=1
|
36 |
+
#../llama-server -m gemma-2-2B-jpn-it-BF16.gguf --n_gpu_layers 100 --port 8020
|
37 |
+
|
38 |
#########################
|
39 |
#launch ngrok
|
40 |
ngrok http http://localhost:8765
|