sync from github
Browse files
src/backend/tasks/arena_hard/task.py
CHANGED
@@ -72,7 +72,7 @@ class ArenaHard(ConfigurableTask):
|
|
72 |
super().__init__(config={"metadata": {"version": self.VERSION}})
|
73 |
# these end tokens are hard coded because of the current limitaion of the llm-eval.
|
74 |
# self.generation_kwargs = {"until": ["\n\n", "<unk>", "<|im_end|>", "</s>", "<|endoftext|>"], "max_length": 512}
|
75 |
-
self.generation_kwargs = {"until": ["</s>", "<|im_end|>"], "
|
76 |
# self.generation_kwargs_sampling_number = 5 # the number of sampling for self-consistence
|
77 |
# self.generation_kwargs_sampling = {
|
78 |
# "temperature": 0.99,
|
|
|
72 |
super().__init__(config={"metadata": {"version": self.VERSION}})
|
73 |
# these end tokens are hard coded because of the current limitaion of the llm-eval.
|
74 |
# self.generation_kwargs = {"until": ["\n\n", "<unk>", "<|im_end|>", "</s>", "<|endoftext|>"], "max_length": 512}
|
75 |
+
self.generation_kwargs = {"until": ["</s>", "<|im_end|>"], "max_gen_toks": 4096}
|
76 |
# self.generation_kwargs_sampling_number = 5 # the number of sampling for self-consistence
|
77 |
# self.generation_kwargs_sampling = {
|
78 |
# "temperature": 0.99,
|
src/display/utils.py
CHANGED
@@ -188,6 +188,7 @@ class InferenceFramework(Enum):
|
|
188 |
HF_Chat = ModelDetails("hf-chat")
|
189 |
VLLM = ModelDetails("vllm_moe")
|
190 |
TRTLLM = ModelDetails("tensorrt_llm")
|
|
|
191 |
Unknown = ModelDetails("?")
|
192 |
|
193 |
def to_str(self):
|
@@ -203,6 +204,8 @@ class InferenceFramework(Enum):
|
|
203 |
return InferenceFramework.HF_Chat
|
204 |
if inference_framework in ["vllm_moe"]:
|
205 |
return InferenceFramework.VLLM
|
|
|
|
|
206 |
return InferenceFramework.Unknown
|
207 |
|
208 |
class GPUType(Enum):
|
|
|
188 |
HF_Chat = ModelDetails("hf-chat")
|
189 |
VLLM = ModelDetails("vllm_moe")
|
190 |
TRTLLM = ModelDetails("tensorrt_llm")
|
191 |
+
VLLM_FIX = ModelDetails("vllm_moe_fixbs")
|
192 |
Unknown = ModelDetails("?")
|
193 |
|
194 |
def to_str(self):
|
|
|
204 |
return InferenceFramework.HF_Chat
|
205 |
if inference_framework in ["vllm_moe"]:
|
206 |
return InferenceFramework.VLLM
|
207 |
+
if inference_framework in ["vllm_moe_fixbs"]:
|
208 |
+
return InferenceFramework.VLLM_FIX
|
209 |
return InferenceFramework.Unknown
|
210 |
|
211 |
class GPUType(Enum):
|