a100 kh commited on
Commit
33a3d6d
1 Parent(s): 111a8d9
Files changed (2) hide show
  1. api_endpoints.json +12 -0
  2. local/local_setup +4 -0
api_endpoints.json CHANGED
@@ -131,6 +131,18 @@
131
  "text-arena": true,
132
  "vision-arena": false
133
  },
 
 
 
 
 
 
 
 
 
 
 
 
134
  "gpt-4o-mini-2024-07-18": {
135
  "model_name": "gpt-4o-mini-2024-07-18",
136
  "api_type": "openai",
 
131
  "text-arena": true,
132
  "vision-arena": false
133
  },
134
+ "gpt-4-turbo-2024-04-09": {
135
+ "model_name": "gpt-4-turbo-2024-04-09",
136
+ "api_type": "openai",
137
+ "api_base": "https://api.openai.com/v1",
138
+ "anony_only": false,
139
+ "recommended_config": {
140
+ "temperature": 0.7,
141
+ "top_p": 1.0
142
+ },
143
+ "text-arena": true,
144
+ "vision-arena": false
145
+ },
146
  "gpt-4o-mini-2024-07-18": {
147
  "model_name": "gpt-4o-mini-2024-07-18",
148
  "api_type": "openai",
local/local_setup CHANGED
@@ -35,6 +35,10 @@ python -m vllm.entrypoints.openai.api_server --model tokyotech-llm/Llama-3.1-Swa
35
  #export CUDA_VISIBLE_DEVICES=1
36
  #../llama-server -m gemma-2-2B-jpn-it-BF16.gguf --n_gpu_layers 100 --port 8020
37
 
 
 
 
 
38
  #########################
39
  #launch ngrok
40
  ngrok http http://localhost:8765
 
35
  #export CUDA_VISIBLE_DEVICES=1
36
  #../llama-server -m gemma-2-2B-jpn-it-BF16.gguf --n_gpu_layers 100 --port 8020
37
 
38
+ #tanuki 8x8b
39
+ export CUDA_VISIBLE_DEVICES=1
40
+ python -m vllm.entrypoints.openai.api_server --model team-hatakeyama-phase2/Tanuki-8x8B-dpo-v1.0-AWQ --max-model-len 4096 --port 8020 --gpu-memory-utilization 0.2 --trust-remote-code --quantization awq --api-key $VLLM_API_KEY
41
+
42
  #########################
43
  #launch ngrok
44
  ngrok http http://localhost:8765