a100 kh commited on
Commit
111a8d9
1 Parent(s): 7780e34
Files changed (2) hide show
  1. api_endpoints.json +26 -0
  2. local/local_setup +4 -0
api_endpoints.json CHANGED
@@ -58,6 +58,32 @@
58
  "text-arena": true,
59
  "vision-arena": false
60
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
  "google/gemma-2-27b-it": {
62
  "model_name": "google/gemma-2-27b-it",
63
  "api_type": "openai-custom-deepinfra",
 
58
  "text-arena": true,
59
  "vision-arena": false
60
  },
61
+ "meta-llama/Meta-Llama-3.1-405B-Instruct": {
62
+ "model_name": "meta-llama/Meta-Llama-3.1-405B-Instruct",
63
+ "api_type": "openai-custom-deepinfra",
64
+ "api_base": "https://api.deepinfra.com/v1/openai",
65
+ "env_api_key": "DEEPINFRA_API_KEY",
66
+ "anony_only": false,
67
+ "recommended_config": {
68
+ "temperature": 0.7,
69
+ "top_p": 1.0
70
+ },
71
+ "text-arena": true,
72
+ "vision-arena": false
73
+ },
74
+ "NousResearch/Hermes-3-Llama-3.1-405B": {
75
+ "model_name": "NousResearch/Hermes-3-Llama-3.1-405B",
76
+ "api_type": "openai-custom-deepinfra",
77
+ "api_base": "https://api.deepinfra.com/v1/openai",
78
+ "env_api_key": "DEEPINFRA_API_KEY",
79
+ "anony_only": false,
80
+ "recommended_config": {
81
+ "temperature": 0.7,
82
+ "top_p": 1.0
83
+ },
84
+ "text-arena": true,
85
+ "vision-arena": false
86
+ },
87
  "google/gemma-2-27b-it": {
88
  "model_name": "google/gemma-2-27b-it",
89
  "api_type": "openai-custom-deepinfra",
local/local_setup CHANGED
@@ -31,6 +31,10 @@ export CUDA_VISIBLE_DEVICES=0
31
  export CUDA_VISIBLE_DEVICES=1
32
  python -m vllm.entrypoints.openai.api_server --model tokyotech-llm/Llama-3.1-Swallow-70B-Instruct-v0.1 --max-model-len 4096 --port 8019 --gpu-memory-utilization 0.6 --trust-remote-code --quantization bitsandbytes --load-format bitsandbytes --api-key $VLLM_API_KEY
33
 
 
 
 
 
34
  #########################
35
  #launch ngrok
36
  ngrok http http://localhost:8765
 
31
  export CUDA_VISIBLE_DEVICES=1
32
  python -m vllm.entrypoints.openai.api_server --model tokyotech-llm/Llama-3.1-Swallow-70B-Instruct-v0.1 --max-model-len 4096 --port 8019 --gpu-memory-utilization 0.6 --trust-remote-code --quantization bitsandbytes --load-format bitsandbytes --api-key $VLLM_API_KEY
33
 
34
+ #gemma
35
+ #export CUDA_VISIBLE_DEVICES=1
36
+ #../llama-server -m gemma-2-2B-jpn-it-BF16.gguf --n_gpu_layers 100 --port 8020
37
+
38
  #########################
39
  #launch ngrok
40
  ngrok http http://localhost:8765