Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Allow setting seq_len/size/dim for gated models
#121
by
tomaarsen
HF staff
- opened
- app.py +16 -3
- model_meta.yaml +16 -0
- utils/model_size.py +1 -1
app.py
CHANGED
@@ -143,6 +143,10 @@ def get_dim_seq_size(model):
|
|
143 |
if not dim:
|
144 |
dim = config.get("hidden_dim", config.get("hidden_size", config.get("d_model", "")))
|
145 |
seq = config.get("n_positions", config.get("max_position_embeddings", config.get("n_ctx", config.get("seq_length", ""))))
|
|
|
|
|
|
|
|
|
146 |
# Get model file size without downloading. Parameters in million parameters and memory in GB
|
147 |
parameters, memory = get_model_parameters_memory(model)
|
148 |
return dim, seq, parameters, memory
|
@@ -244,13 +248,22 @@ def get_mteb_data(tasks=["Clustering"], langs=[], datasets=[], fillna=True, add_
|
|
244 |
# Model & at least one result
|
245 |
if len(out) > 1:
|
246 |
if add_emb_dim:
|
|
|
247 |
try:
|
248 |
-
# Fails on gated repos, so we only include scores for them
|
249 |
if "dim_seq_size" not in MODEL_INFOS[model.modelId] or refresh:
|
250 |
MODEL_INFOS[model.modelId]["dim_seq_size"] = list(get_dim_seq_size(model))
|
251 |
-
out["Embedding Dimensions"], out["Max Tokens"], out["Model Size (Million Parameters)"], out["Memory Usage (GB, fp32)"] = tuple(MODEL_INFOS[model.modelId]["dim_seq_size"])
|
252 |
except:
|
253 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
254 |
df_list.append(out)
|
255 |
if model.library_name == "sentence-transformers" or "sentence-transformers" in model.tags or "modules.json" in {file.rfilename for file in model.siblings}:
|
256 |
SENTENCE_TRANSFORMERS_COMPATIBLE_MODELS.add(out["Model"])
|
|
|
143 |
if not dim:
|
144 |
dim = config.get("hidden_dim", config.get("hidden_size", config.get("d_model", "")))
|
145 |
seq = config.get("n_positions", config.get("max_position_embeddings", config.get("n_ctx", config.get("seq_length", ""))))
|
146 |
+
|
147 |
+
if dim == "" or seq == "":
|
148 |
+
raise Exception(f"Could not find dim or seq for model {model.modelId}")
|
149 |
+
|
150 |
# Get model file size without downloading. Parameters in million parameters and memory in GB
|
151 |
parameters, memory = get_model_parameters_memory(model)
|
152 |
return dim, seq, parameters, memory
|
|
|
248 |
# Model & at least one result
|
249 |
if len(out) > 1:
|
250 |
if add_emb_dim:
|
251 |
+
# The except clause triggers on gated repos, we can use external metadata for those
|
252 |
try:
|
|
|
253 |
if "dim_seq_size" not in MODEL_INFOS[model.modelId] or refresh:
|
254 |
MODEL_INFOS[model.modelId]["dim_seq_size"] = list(get_dim_seq_size(model))
|
|
|
255 |
except:
|
256 |
+
name_without_org = model.modelId.split("/")[-1]
|
257 |
+
# EXTERNAL_MODEL_TO_SIZE[name_without_org] refers to millions of parameters, so for memory usage
|
258 |
+
# we multiply by 1e6 to get just the number of parameters, then by 4 to get the number of bytes
|
259 |
+
# given fp32 precision (4 bytes per float), then divide by 1024**3 to get the number of GB
|
260 |
+
MODEL_INFOS[model.modelId]["dim_seq_size"] = (
|
261 |
+
EXTERNAL_MODEL_TO_DIM.get(name_without_org, ""),
|
262 |
+
EXTERNAL_MODEL_TO_SEQLEN.get(name_without_org, ""),
|
263 |
+
EXTERNAL_MODEL_TO_SIZE.get(name_without_org, ""),
|
264 |
+
round(EXTERNAL_MODEL_TO_SIZE[name_without_org] * 1e6 * 4 / 1024**3, 2) if name_without_org in EXTERNAL_MODEL_TO_SIZE else "",
|
265 |
+
)
|
266 |
+
out["Embedding Dimensions"], out["Max Tokens"], out["Model Size (Million Parameters)"], out["Memory Usage (GB, fp32)"] = tuple(MODEL_INFOS[model.modelId]["dim_seq_size"])
|
267 |
df_list.append(out)
|
268 |
if model.library_name == "sentence-transformers" or "sentence-transformers" in model.tags or "modules.json" in {file.rfilename for file in model.siblings}:
|
269 |
SENTENCE_TRANSFORMERS_COMPATIBLE_MODELS.add(out["Model"])
|
model_meta.yaml
CHANGED
@@ -1211,6 +1211,22 @@ model_meta:
|
|
1211 |
is_external: true
|
1212 |
is_proprietary: false
|
1213 |
is_sentence_transformers_compatible: true
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1214 |
models_to_skip:
|
1215 |
- michaelfeil/ct2fast-e5-large-v2
|
1216 |
- McGill-NLP/LLM2Vec-Sheared-LLaMA-mntp-unsup-simcse
|
|
|
1211 |
is_external: true
|
1212 |
is_proprietary: false
|
1213 |
is_sentence_transformers_compatible: true
|
1214 |
+
NV-Embed-v1:
|
1215 |
+
link: https://huggingface.co/nvidia/NV-Embed-v1
|
1216 |
+
seq_len: 32768
|
1217 |
+
size: 7851
|
1218 |
+
dim: 4096
|
1219 |
+
is_external: false
|
1220 |
+
is_proprietary: false
|
1221 |
+
is_sentence_transformers_compatible: false
|
1222 |
+
Linq-Embed-Mistral:
|
1223 |
+
link: https://huggingface.co/Linq-AI-Research/Linq-Embed-Mistral
|
1224 |
+
seq_len: 32768
|
1225 |
+
size: 7111
|
1226 |
+
dim: 4096
|
1227 |
+
is_external: false
|
1228 |
+
is_proprietary: false
|
1229 |
+
is_sentence_transformers_compatible: true
|
1230 |
models_to_skip:
|
1231 |
- michaelfeil/ct2fast-e5-large-v2
|
1232 |
- McGill-NLP/LLM2Vec-Sheared-LLaMA-mntp-unsup-simcse
|
utils/model_size.py
CHANGED
@@ -40,4 +40,4 @@ def get_model_parameters_memory(model_info: ModelInfo):
|
|
40 |
if ("metadata" in size) and ("total_size" in size["metadata"]):
|
41 |
return round(size["metadata"]["total_size"] / bytes_per_param / 1e6), round(size["metadata"]["total_size"] / 1024**3, 2)
|
42 |
|
43 |
-
|
|
|
40 |
if ("metadata" in size) and ("total_size" in size["metadata"]):
|
41 |
return round(size["metadata"]["total_size"] / bytes_per_param / 1e6), round(size["metadata"]["total_size"] / 1024**3, 2)
|
42 |
|
43 |
+
raise Exception(f"Could not find the model parameters for {model_info.id}")
|