Spaces:
Running
Running
a100 kh
commited on
Commit
•
33a3d6d
1
Parent(s):
111a8d9
endpoint
Browse files- api_endpoints.json +12 -0
- local/local_setup +4 -0
api_endpoints.json
CHANGED
@@ -131,6 +131,18 @@
|
|
131 |
"text-arena": true,
|
132 |
"vision-arena": false
|
133 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
134 |
"gpt-4o-mini-2024-07-18": {
|
135 |
"model_name": "gpt-4o-mini-2024-07-18",
|
136 |
"api_type": "openai",
|
|
|
131 |
"text-arena": true,
|
132 |
"vision-arena": false
|
133 |
},
|
134 |
+
"gpt-4-turbo-2024-04-09": {
|
135 |
+
"model_name": "gpt-4-turbo-2024-04-09",
|
136 |
+
"api_type": "openai",
|
137 |
+
"api_base": "https://api.openai.com/v1",
|
138 |
+
"anony_only": false,
|
139 |
+
"recommended_config": {
|
140 |
+
"temperature": 0.7,
|
141 |
+
"top_p": 1.0
|
142 |
+
},
|
143 |
+
"text-arena": true,
|
144 |
+
"vision-arena": false
|
145 |
+
},
|
146 |
"gpt-4o-mini-2024-07-18": {
|
147 |
"model_name": "gpt-4o-mini-2024-07-18",
|
148 |
"api_type": "openai",
|
local/local_setup
CHANGED
@@ -35,6 +35,10 @@ python -m vllm.entrypoints.openai.api_server --model tokyotech-llm/Llama-3.1-Swa
|
|
35 |
#export CUDA_VISIBLE_DEVICES=1
|
36 |
#../llama-server -m gemma-2-2B-jpn-it-BF16.gguf --n_gpu_layers 100 --port 8020
|
37 |
|
|
|
|
|
|
|
|
|
38 |
#########################
|
39 |
#launch ngrok
|
40 |
ngrok http http://localhost:8765
|
|
|
35 |
#export CUDA_VISIBLE_DEVICES=1
|
36 |
#../llama-server -m gemma-2-2B-jpn-it-BF16.gguf --n_gpu_layers 100 --port 8020
|
37 |
|
38 |
+
#tanuki 8x8b
|
39 |
+
export CUDA_VISIBLE_DEVICES=1
|
40 |
+
python -m vllm.entrypoints.openai.api_server --model team-hatakeyama-phase2/Tanuki-8x8B-dpo-v1.0-AWQ --max-model-len 4096 --port 8020 --gpu-memory-utilization 0.2 --trust-remote-code --quantization awq --api-key $VLLM_API_KEY
|
41 |
+
|
42 |
#########################
|
43 |
#launch ngrok
|
44 |
ngrok http http://localhost:8765
|