a100 kh commited on
Commit
d2e1c79
1 Parent(s): 33a3d6d

add tanuki

Browse files
Files changed (3) hide show
  1. api_endpoints.json +13 -0
  2. local/local_setup +1 -1
  3. local/nginx +7 -6
api_endpoints.json CHANGED
@@ -194,6 +194,19 @@
194
  "text-arena": true,
195
  "vision-arena": false
196
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
197
  "llm-jp-3-13b-instruct-Q8_0.gguf": {
198
  "model_name": "llm-jp-3-13b-instruct-Q8_0.gguf",
199
  "api_type": "openai-llmjp3",
 
194
  "text-arena": true,
195
  "vision-arena": false
196
  },
197
+ "team-hatakeyama-phase2/Tanuki-8x8B-dpo-v1.0-AWQ": {
198
+ "model_name": "team-hatakeyama-phase2/Tanuki-8x8B-dpo-v1.0-AWQ",
199
+ "api_type": "openai-custom-tanuki",
200
+ "api_end": "Tanuki-8x8B-dpo",
201
+ "env_api_key": "VLLM_API_KEY",
202
+ "anony_only": false,
203
+ "recommended_config": {
204
+ "temperature": 0.7,
205
+ "top_p": 1.0
206
+ },
207
+ "text-arena": true,
208
+ "vision-arena": false
209
+ },
210
  "llm-jp-3-13b-instruct-Q8_0.gguf": {
211
  "model_name": "llm-jp-3-13b-instruct-Q8_0.gguf",
212
  "api_type": "openai-llmjp3",
local/local_setup CHANGED
@@ -37,7 +37,7 @@ python -m vllm.entrypoints.openai.api_server --model tokyotech-llm/Llama-3.1-Swa
37
 
38
  #tanuki 8x8b
39
  export CUDA_VISIBLE_DEVICES=1
40
- python -m vllm.entrypoints.openai.api_server --model team-hatakeyama-phase2/Tanuki-8x8B-dpo-v1.0-AWQ --max-model-len 4096 --port 8020 --gpu-memory-utilization 0.2 --trust-remote-code --quantization awq --api-key $VLLM_API_KEY
41
 
42
  #########################
43
  #launch ngrok
 
37
 
38
  #tanuki 8x8b
39
  export CUDA_VISIBLE_DEVICES=1
40
+ python -m vllm.entrypoints.openai.api_server --model team-hatakeyama-phase2/Tanuki-8x8B-dpo-v1.0-AWQ --max-model-len 4096 --port 8020 --gpu-memory-utilization 0.35 --trust-remote-code --quantization awq --api-key $VLLM_API_KEY
41
 
42
  #########################
43
  #launch ngrok
local/nginx CHANGED
@@ -1,14 +1,15 @@
1
  #sudo vi /etc/nginx/sites-available/default
 
2
  #sudo systemctl restart nginx
3
 
4
 
5
  server {
6
- listen 8765; # 一つのポートでまとめる
7
 
8
- location /swallow70/ {proxy_pass http://localhost:8019/v1/;}
9
- location /llm-jp-13b/ {proxy_pass http://localhost:8016/v1/;}
10
- location /Tanuki-8B-dpo/ {proxy_pass http://localhost:8012/v1/;}
11
- location /calm3-22b-chat/ {proxy_pass http://localhost:8011/v1/;}
12
- location /Swallow-8B/ {proxy_pass http://localhost:8010/v1/;}
13
  }
14
 
 
1
  #sudo vi /etc/nginx/sites-available/default
2
+ #sudo systemctl stop nginx
3
  #sudo systemctl restart nginx
4
 
5
 
6
  server {
7
+ listen 8765; # 一つのポートでまとめる
8
 
9
+ location /swallow70/ {proxy_pass http://localhost:8019/v1/;}
10
+ location /llm-jp-13b/ {proxy_pass http://localhost:8016/v1/;}
11
+ location /Tanuki-8B-dpo/ {proxy_pass http://localhost:8012/v1/;}
12
+ location /calm3-22b-chat/ {proxy_pass http://localhost:8011/v1/;}
13
+ location /Swallow-8B/ {proxy_pass http://localhost:8010/v1/;}
14
  }
15