Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Update src/leaderboard/read_evals.py
Browse files
src/leaderboard/read_evals.py
CHANGED
@@ -108,9 +108,33 @@ class EvalResult:
|
|
108 |
except Exception:
|
109 |
print(f"Could not find request file for {self.org}/{self.model} with precision {self.precision.value.name}")
|
110 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
111 |
def to_dict(self):
|
112 |
"""Converts the Eval Result to a dict compatible with our dataframe display"""
|
113 |
-
average = sum([v for v in self.results.values() if v is not None]) / len(
|
114 |
data_dict = {
|
115 |
"eval_name": self.eval_name, # not a column, just a save name,
|
116 |
AutoEvalColumn.precision.name: self.precision.value.name,
|
@@ -126,14 +150,19 @@ class EvalResult:
|
|
126 |
AutoEvalColumn.params.name: self.num_params,
|
127 |
AutoEvalColumn.still_on_hub.name: self.still_on_hub,
|
128 |
}
|
129 |
-
|
130 |
for task in Tasks:
|
131 |
-
|
132 |
-
|
133 |
-
|
|
|
|
|
|
|
|
|
134 |
return data_dict
|
135 |
|
136 |
|
|
|
137 |
def get_request_file_for_model(requests_path, model_name, precision):
|
138 |
"""Selects the correct request file for a given model. Only keeps runs tagged as FINISHED"""
|
139 |
request_files = os.path.join(
|
|
|
108 |
except Exception:
|
109 |
print(f"Could not find request file for {self.org}/{self.model} with precision {self.precision.value.name}")
|
110 |
|
111 |
+
# def to_dict(self):
|
112 |
+
# """Converts the Eval Result to a dict compatible with our dataframe display"""
|
113 |
+
# average = sum([v for v in self.results.values() if v is not None]) / len(Tasks)
|
114 |
+
# data_dict = {
|
115 |
+
# "eval_name": self.eval_name, # not a column, just a save name,
|
116 |
+
# AutoEvalColumn.precision.name: self.precision.value.name,
|
117 |
+
# AutoEvalColumn.model_type.name: self.model_type.value.name,
|
118 |
+
# AutoEvalColumn.model_type_symbol.name: self.model_type.value.symbol,
|
119 |
+
# AutoEvalColumn.weight_type.name: self.weight_type.value.name,
|
120 |
+
# AutoEvalColumn.architecture.name: self.architecture,
|
121 |
+
# AutoEvalColumn.model.name: make_clickable_model(self.full_model),
|
122 |
+
# AutoEvalColumn.revision.name: self.revision,
|
123 |
+
# AutoEvalColumn.average.name: average,
|
124 |
+
# AutoEvalColumn.license.name: self.license,
|
125 |
+
# AutoEvalColumn.likes.name: self.likes,
|
126 |
+
# AutoEvalColumn.params.name: self.num_params,
|
127 |
+
# AutoEvalColumn.still_on_hub.name: self.still_on_hub,
|
128 |
+
# }
|
129 |
+
|
130 |
+
# for task in Tasks:
|
131 |
+
# data_dict[task.value.col_name] = self.results.get(task.value.benchmark, None)
|
132 |
+
# print(f"Debug: {task.value.col_name} = {self.results.get(task.value.benchmark, 'N/A')}")
|
133 |
+
|
134 |
+
# return data_dict
|
135 |
def to_dict(self):
|
136 |
"""Converts the Eval Result to a dict compatible with our dataframe display"""
|
137 |
+
average = sum([v for v in self.results.values() if v is not None]) / len(self.results)
|
138 |
data_dict = {
|
139 |
"eval_name": self.eval_name, # not a column, just a save name,
|
140 |
AutoEvalColumn.precision.name: self.precision.value.name,
|
|
|
150 |
AutoEvalColumn.params.name: self.num_params,
|
151 |
AutoEvalColumn.still_on_hub.name: self.still_on_hub,
|
152 |
}
|
153 |
+
|
154 |
for task in Tasks:
|
155 |
+
task_value = self.results.get(task.value.benchmark)
|
156 |
+
if task_value is not None:
|
157 |
+
data_dict[task.value.col_name] = task_value
|
158 |
+
else:
|
159 |
+
data_dict[task.value.col_name] = 'N/A' # スコアが存在しない場合の処理
|
160 |
+
print(f"Debug: {task.value.col_name} = {data_dict[task.value.col_name]}")
|
161 |
+
|
162 |
return data_dict
|
163 |
|
164 |
|
165 |
+
|
166 |
def get_request_file_for_model(requests_path, model_name, precision):
|
167 |
"""Selects the correct request file for a given model. Only keeps runs tagged as FINISHED"""
|
168 |
request_files = os.path.join(
|