Add SetFit model
Browse files- README.md +292 -223
- config.json +2 -2
- config_sentence_transformers.json +3 -3
- model.safetensors +2 -2
- model_head.pkl +2 -2
- sentence_bert_config.json +1 -1
- tokenizer.json +1 -1
- tokenizer_config.json +1 -1
README.md
CHANGED
@@ -5,22 +5,19 @@ tags:
|
|
5 |
- sentence-transformers
|
6 |
- text-classification
|
7 |
- generated_from_setfit_trainer
|
8 |
-
base_model: sentence-transformers/all-MiniLM-
|
9 |
metrics:
|
10 |
- accuracy
|
11 |
widget:
|
12 |
-
- text:
|
13 |
-
- text:
|
14 |
-
|
15 |
-
- text:
|
16 |
-
- text:
|
17 |
-
impacts compared to those of hydroelectric power?
|
18 |
-
- text: Quels est le point essentiel à retenir pour maximiser l'efficacité et les
|
19 |
-
bénéfices des réunions virtuelles
|
20 |
pipeline_tag: text-classification
|
21 |
inference: true
|
22 |
model-index:
|
23 |
-
- name: SetFit with sentence-transformers/all-MiniLM-
|
24 |
results:
|
25 |
- task:
|
26 |
type: text-classification
|
@@ -31,13 +28,13 @@ model-index:
|
|
31 |
split: test
|
32 |
metrics:
|
33 |
- type: accuracy
|
34 |
-
value: 0.
|
35 |
name: Accuracy
|
36 |
---
|
37 |
|
38 |
-
# SetFit with sentence-transformers/all-MiniLM-
|
39 |
|
40 |
-
This is a [SetFit](https://github.com/huggingface/setfit) model that can be used for Text Classification. This SetFit model uses [sentence-transformers/all-MiniLM-
|
41 |
|
42 |
The model has been trained using an efficient few-shot learning technique that involves:
|
43 |
|
@@ -48,10 +45,10 @@ The model has been trained using an efficient few-shot learning technique that i
|
|
48 |
|
49 |
### Model Description
|
50 |
- **Model Type:** SetFit
|
51 |
-
- **Sentence Transformer body:** [sentence-transformers/all-MiniLM-
|
52 |
- **Classification head:** a [LogisticRegression](https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LogisticRegression.html) instance
|
53 |
-
- **Maximum Sequence Length:**
|
54 |
-
- **Number of Classes:**
|
55 |
<!-- - **Training Dataset:** [Unknown](https://huggingface.co/datasets/unknown) -->
|
56 |
<!-- - **Language:** Unknown -->
|
57 |
<!-- - **License:** Unknown -->
|
@@ -63,19 +60,20 @@ The model has been trained using an efficient few-shot learning technique that i
|
|
63 |
- **Blogpost:** [SetFit: Efficient Few-Shot Learning Without Prompts](https://huggingface.co/blog/setfit)
|
64 |
|
65 |
### Model Labels
|
66 |
-
| Label | Examples
|
67 |
-
|
68 |
-
| sub_queries | <ul><li>
|
69 |
-
|
|
70 |
-
| exchange | <ul><li>'Pourriez-vous
|
71 |
-
|
|
|
|
72 |
|
73 |
## Evaluation
|
74 |
|
75 |
### Metrics
|
76 |
| Label | Accuracy |
|
77 |
|:--------|:---------|
|
78 |
-
| **all** | 0.
|
79 |
|
80 |
## Uses
|
81 |
|
@@ -95,7 +93,7 @@ from setfit import SetFitModel
|
|
95 |
# Download from the 🤗 Hub
|
96 |
model = SetFitModel.from_pretrained("egis-group/router_mini_lm_l6")
|
97 |
# Run inference
|
98 |
-
preds = model("
|
99 |
```
|
100 |
|
101 |
<!--
|
@@ -127,7 +125,7 @@ preds = model("Qu'est-ce que la biodiversité ?")
|
|
127 |
### Training Set Metrics
|
128 |
| Training set | Min | Median | Max |
|
129 |
|:-------------|:----|:--------|:----|
|
130 |
-
| Word count |
|
131 |
|
132 |
| Label | Training Sample Count |
|
133 |
|:---------|:----------------------|
|
@@ -154,209 +152,280 @@ preds = model("Qu'est-ce que la biodiversité ?")
|
|
154 |
### Training Results
|
155 |
| Epoch | Step | Training Loss | Validation Loss |
|
156 |
|:-------:|:--------:|:-------------:|:---------------:|
|
157 |
-
| 0.
|
158 |
-
| 0.
|
159 |
-
| 0.
|
160 |
-
| 0.
|
161 |
-
| 0.
|
162 |
-
| 0.
|
163 |
-
| 0.
|
164 |
-
| 0.
|
165 |
-
| 0.
|
166 |
-
| 0.
|
167 |
-
| 0.
|
168 |
-
| 0.
|
169 |
-
| 0.
|
170 |
-
| 0.
|
171 |
-
| 0.
|
172 |
-
| 0.
|
173 |
-
| 0.
|
174 |
-
| 0.
|
175 |
-
| 0.
|
176 |
-
| 0.
|
177 |
-
| 0.
|
178 |
-
| 0.
|
179 |
-
| 0.
|
180 |
-
| 0.
|
181 |
-
| 0.
|
182 |
-
| 0.
|
183 |
-
| 0.
|
184 |
-
| 0.
|
185 |
-
| 0.
|
186 |
-
| 0.
|
187 |
-
| 0.
|
188 |
-
| 0.
|
189 |
-
| 0.
|
190 |
-
| 0.
|
191 |
-
| 0.
|
192 |
-
| 0.
|
193 |
-
| 0.
|
194 |
-
| 0.
|
195 |
-
| 0.
|
196 |
-
| 0.
|
197 |
-
| 0.
|
198 |
-
| 0.
|
199 |
-
| 0.
|
200 |
-
| 0.
|
201 |
-
| 0.
|
202 |
-
| 0.
|
203 |
-
| 0.
|
204 |
-
| 0.
|
205 |
-
| 0.
|
206 |
-
|
|
207 |
-
|
|
208 |
-
|
|
209 |
-
|
|
210 |
-
|
|
211 |
-
|
|
212 |
-
|
|
213 |
-
|
|
214 |
-
|
|
215 |
-
|
|
216 |
-
|
|
217 |
-
|
|
218 |
-
|
|
219 |
-
|
|
220 |
-
|
|
221 |
-
|
|
222 |
-
|
|
223 |
-
| 1.
|
224 |
-
| 1.
|
225 |
-
| 1.
|
226 |
-
| 1.
|
227 |
-
| 1.
|
228 |
-
| 1.
|
229 |
-
| 1.
|
230 |
-
| 1.
|
231 |
-
| 1.
|
232 |
-
| 1.
|
233 |
-
| 1.
|
234 |
-
| 1.
|
235 |
-
| 1.
|
236 |
-
| 1.
|
237 |
-
| 1.
|
238 |
-
| 1.
|
239 |
-
| 1.
|
240 |
-
| 1.
|
241 |
-
| 1.
|
242 |
-
| 1.
|
243 |
-
| 1.
|
244 |
-
| 1.
|
245 |
-
| 1.
|
246 |
-
| 1.
|
247 |
-
| 1.
|
248 |
-
| 1.
|
249 |
-
| 1.
|
250 |
-
| 1.
|
251 |
-
| 1.
|
252 |
-
| 1.
|
253 |
-
| 1.
|
254 |
-
| 1.
|
255 |
-
|
|
256 |
-
|
|
257 |
-
|
|
258 |
-
|
|
259 |
-
|
|
260 |
-
|
|
261 |
-
|
|
262 |
-
|
|
263 |
-
|
|
264 |
-
|
|
265 |
-
|
|
266 |
-
|
|
267 |
-
|
|
268 |
-
|
|
269 |
-
|
|
270 |
-
|
|
271 |
-
|
|
272 |
-
|
|
273 |
-
|
|
274 |
-
|
|
275 |
-
|
|
276 |
-
|
|
277 |
-
|
|
278 |
-
|
|
279 |
-
|
|
280 |
-
|
|
281 |
-
|
|
282 |
-
|
|
283 |
-
|
|
284 |
-
|
|
285 |
-
|
|
286 |
-
|
|
287 |
-
|
|
288 |
-
|
|
289 |
-
|
|
290 |
-
| 2.
|
291 |
-
| 2.
|
292 |
-
| 2.
|
293 |
-
| 2.
|
294 |
-
| 2.
|
295 |
-
| 2.
|
296 |
-
| 2.
|
297 |
-
| 2.
|
298 |
-
| 2.
|
299 |
-
| 2.
|
300 |
-
| 2.
|
301 |
-
| 2.
|
302 |
-
| 2.
|
303 |
-
| 2.
|
304 |
-
|
|
305 |
-
|
|
306 |
-
|
|
307 |
-
|
|
308 |
-
|
|
309 |
-
|
|
310 |
-
|
|
311 |
-
|
|
312 |
-
|
|
313 |
-
|
|
314 |
-
|
|
315 |
-
|
|
316 |
-
|
|
317 |
-
|
|
318 |
-
|
|
319 |
-
|
|
320 |
-
|
|
321 |
-
|
|
322 |
-
|
|
323 |
-
|
|
324 |
-
|
|
325 |
-
|
|
326 |
-
|
|
327 |
-
|
|
328 |
-
|
|
329 |
-
|
|
330 |
-
|
|
331 |
-
|
|
332 |
-
|
|
333 |
-
|
|
334 |
-
|
|
335 |
-
|
|
336 |
-
|
|
337 |
-
|
|
338 |
-
|
|
339 |
-
|
|
340 |
-
|
|
341 |
-
|
|
342 |
-
|
|
343 |
-
|
|
344 |
-
|
|
345 |
-
|
|
346 |
-
|
|
347 |
-
|
|
348 |
-
|
|
349 |
-
|
|
350 |
-
|
|
351 |
-
|
|
352 |
-
|
|
353 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
354 |
|
355 |
* The bold row denotes the saved checkpoint.
|
356 |
### Framework Versions
|
357 |
- Python: 3.10.12
|
358 |
- SetFit: 1.0.3
|
359 |
-
- Sentence Transformers: 3.0.
|
360 |
- Transformers: 4.39.0
|
361 |
- PyTorch: 2.3.0+cu121
|
362 |
- Datasets: 2.19.2
|
|
|
5 |
- sentence-transformers
|
6 |
- text-classification
|
7 |
- generated_from_setfit_trainer
|
8 |
+
base_model: sentence-transformers/all-MiniLM-L12-v2
|
9 |
metrics:
|
10 |
- accuracy
|
11 |
widget:
|
12 |
+
- text: Could you provide the average temperature, annual rainfall in Paris?
|
13 |
+
- text: Can you provide a summary of the key points discussed about urban development?
|
14 |
+
- text: Compare ces deux documents
|
15 |
+
- text: What are the steps required to apply for a passport?
|
16 |
+
- text: What is the basic definition of seismic design?
|
|
|
|
|
|
|
17 |
pipeline_tag: text-classification
|
18 |
inference: true
|
19 |
model-index:
|
20 |
+
- name: SetFit with sentence-transformers/all-MiniLM-L12-v2
|
21 |
results:
|
22 |
- task:
|
23 |
type: text-classification
|
|
|
28 |
split: test
|
29 |
metrics:
|
30 |
- type: accuracy
|
31 |
+
value: 0.7333333333333333
|
32 |
name: Accuracy
|
33 |
---
|
34 |
|
35 |
+
# SetFit with sentence-transformers/all-MiniLM-L12-v2
|
36 |
|
37 |
+
This is a [SetFit](https://github.com/huggingface/setfit) model that can be used for Text Classification. This SetFit model uses [sentence-transformers/all-MiniLM-L12-v2](https://huggingface.co/sentence-transformers/all-MiniLM-L12-v2) as the Sentence Transformer embedding model. A [LogisticRegression](https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LogisticRegression.html) instance is used for classification.
|
38 |
|
39 |
The model has been trained using an efficient few-shot learning technique that involves:
|
40 |
|
|
|
45 |
|
46 |
### Model Description
|
47 |
- **Model Type:** SetFit
|
48 |
+
- **Sentence Transformer body:** [sentence-transformers/all-MiniLM-L12-v2](https://huggingface.co/sentence-transformers/all-MiniLM-L12-v2)
|
49 |
- **Classification head:** a [LogisticRegression](https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LogisticRegression.html) instance
|
50 |
+
- **Maximum Sequence Length:** 128 tokens
|
51 |
+
- **Number of Classes:** 5 classes
|
52 |
<!-- - **Training Dataset:** [Unknown](https://huggingface.co/datasets/unknown) -->
|
53 |
<!-- - **Language:** Unknown -->
|
54 |
<!-- - **License:** Unknown -->
|
|
|
60 |
- **Blogpost:** [SetFit: Efficient Few-Shot Learning Without Prompts](https://huggingface.co/blog/setfit)
|
61 |
|
62 |
### Model Labels
|
63 |
+
| Label | Examples |
|
64 |
+
|:-----------------|:-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
65 |
+
| sub_queries | <ul><li>'How can I use 3D print to build a bridge and how much would it be?'</li><li>'Pourriez-vous détailler les critères spécifiques utilisés pour évaluer la durabilité des matériaux de construction, les types de systèmes HVAC les plus efficaces actuellement en usage dans les bâtiments verts, et les différentes méthodes employées pour réduire les déchets pendant la phase de construction ?'</li><li>'Comment faire une etude de marche? Quelles sont les meilleures sources?'</li></ul> |
|
66 |
+
| summary | <ul><li>'Quelles informations primordiales me conseillez-vous de mémoriser de ce document'</li><li>'Quels sont les points principaux à retenir'</li><li>'What is the primary theme of the document ?'</li></ul> |
|
67 |
+
| exchange | <ul><li>'Pourriez-vous me fournir un résumé des points clés abordés dans notre discussion précédente ?'</li><li>'Quels sont les points clés abordés dans notre discussion précédente ?'</li><li>'Could you restate the main points discussed about acoustic engineering?'</li></ul> |
|
68 |
+
| simple_questions | <ul><li>'Quelle est le principal moteur de la croissance économique ? Fais un post linkedin sur le sujet'</li><li>'Pourriez-vous résumer les bénéfices que les utilisateurs peuvent tirer des récentes avancées en matériel informatique ?'</li><li>'What is the purpose of environmental impact assessments?'</li></ul> |
|
69 |
+
| compare | <ul><li>'Compare the methodologies'</li><li>'Compare the nutritional information provided on these food labels'</li><li>'Analysez comment la structure narrative de ces manuscrits influence leur message'</li></ul> |
|
70 |
|
71 |
## Evaluation
|
72 |
|
73 |
### Metrics
|
74 |
| Label | Accuracy |
|
75 |
|:--------|:---------|
|
76 |
+
| **all** | 0.7333 |
|
77 |
|
78 |
## Uses
|
79 |
|
|
|
93 |
# Download from the 🤗 Hub
|
94 |
model = SetFitModel.from_pretrained("egis-group/router_mini_lm_l6")
|
95 |
# Run inference
|
96 |
+
preds = model("Compare ces deux documents")
|
97 |
```
|
98 |
|
99 |
<!--
|
|
|
125 |
### Training Set Metrics
|
126 |
| Training set | Min | Median | Max |
|
127 |
|:-------------|:----|:--------|:----|
|
128 |
+
| Word count | 3 | 13.4636 | 48 |
|
129 |
|
130 |
| Label | Training Sample Count |
|
131 |
|:---------|:----------------------|
|
|
|
152 |
### Training Results
|
153 |
| Epoch | Step | Training Loss | Validation Loss |
|
154 |
|:-------:|:--------:|:-------------:|:---------------:|
|
155 |
+
| 0.0003 | 1 | 0.3239 | - |
|
156 |
+
| 0.0152 | 50 | 0.3443 | - |
|
157 |
+
| 0.0304 | 100 | 0.2282 | - |
|
158 |
+
| 0.0456 | 150 | 0.2576 | - |
|
159 |
+
| 0.0608 | 200 | 0.2587 | - |
|
160 |
+
| 0.0760 | 250 | 0.1747 | - |
|
161 |
+
| 0.0912 | 300 | 0.1916 | - |
|
162 |
+
| 0.1064 | 350 | 0.1638 | - |
|
163 |
+
| 0.1216 | 400 | 0.1459 | - |
|
164 |
+
| 0.1368 | 450 | 0.1322 | - |
|
165 |
+
| 0.1520 | 500 | 0.038 | - |
|
166 |
+
| 0.1672 | 550 | 0.0636 | - |
|
167 |
+
| 0.1824 | 600 | 0.0613 | - |
|
168 |
+
| 0.1976 | 650 | 0.0322 | - |
|
169 |
+
| 0.2128 | 700 | 0.0159 | - |
|
170 |
+
| 0.2280 | 750 | 0.0029 | - |
|
171 |
+
| 0.2432 | 800 | 0.0012 | - |
|
172 |
+
| 0.2584 | 850 | 0.0019 | - |
|
173 |
+
| 0.2736 | 900 | 0.0025 | - |
|
174 |
+
| 0.2888 | 950 | 0.0028 | - |
|
175 |
+
| 0.3040 | 1000 | 0.001 | - |
|
176 |
+
| 0.3192 | 1050 | 0.0014 | - |
|
177 |
+
| 0.3344 | 1100 | 0.0007 | - |
|
178 |
+
| 0.3497 | 1150 | 0.001 | - |
|
179 |
+
| 0.3649 | 1200 | 0.0014 | - |
|
180 |
+
| 0.3801 | 1250 | 0.0003 | - |
|
181 |
+
| 0.3953 | 1300 | 0.0005 | - |
|
182 |
+
| 0.4105 | 1350 | 0.0003 | - |
|
183 |
+
| 0.4257 | 1400 | 0.0004 | - |
|
184 |
+
| 0.4409 | 1450 | 0.0003 | - |
|
185 |
+
| 0.4561 | 1500 | 0.0004 | - |
|
186 |
+
| 0.4713 | 1550 | 0.0003 | - |
|
187 |
+
| 0.4865 | 1600 | 0.0002 | - |
|
188 |
+
| 0.5017 | 1650 | 0.0004 | - |
|
189 |
+
| 0.5169 | 1700 | 0.0003 | - |
|
190 |
+
| 0.5321 | 1750 | 0.0003 | - |
|
191 |
+
| 0.5473 | 1800 | 0.0004 | - |
|
192 |
+
| 0.5625 | 1850 | 0.0002 | - |
|
193 |
+
| 0.5777 | 1900 | 0.0001 | - |
|
194 |
+
| 0.5929 | 1950 | 0.0001 | - |
|
195 |
+
| 0.6081 | 2000 | 0.0003 | - |
|
196 |
+
| 0.6233 | 2050 | 0.0002 | - |
|
197 |
+
| 0.6385 | 2100 | 0.0001 | - |
|
198 |
+
| 0.6537 | 2150 | 0.0002 | - |
|
199 |
+
| 0.6689 | 2200 | 0.0002 | - |
|
200 |
+
| 0.6841 | 2250 | 0.0001 | - |
|
201 |
+
| 0.6993 | 2300 | 0.0002 | - |
|
202 |
+
| 0.7145 | 2350 | 0.0003 | - |
|
203 |
+
| 0.7297 | 2400 | 0.0002 | - |
|
204 |
+
| 0.7449 | 2450 | 0.0002 | - |
|
205 |
+
| 0.7601 | 2500 | 0.0001 | - |
|
206 |
+
| 0.7753 | 2550 | 0.0002 | - |
|
207 |
+
| 0.7905 | 2600 | 0.0001 | - |
|
208 |
+
| 0.8057 | 2650 | 0.0001 | - |
|
209 |
+
| 0.8209 | 2700 | 0.0001 | - |
|
210 |
+
| 0.8361 | 2750 | 0.0001 | - |
|
211 |
+
| 0.8513 | 2800 | 0.0001 | - |
|
212 |
+
| 0.8665 | 2850 | 0.0001 | - |
|
213 |
+
| 0.8817 | 2900 | 0.0001 | - |
|
214 |
+
| 0.8969 | 2950 | 0.0001 | - |
|
215 |
+
| 0.9121 | 3000 | 0.0001 | - |
|
216 |
+
| 0.9273 | 3050 | 0.0001 | - |
|
217 |
+
| 0.9425 | 3100 | 0.0001 | - |
|
218 |
+
| 0.9577 | 3150 | 0.0001 | - |
|
219 |
+
| 0.9729 | 3200 | 0.0001 | - |
|
220 |
+
| 0.9881 | 3250 | 0.0001 | - |
|
221 |
+
| 1.0 | 3289 | - | 0.0982 |
|
222 |
+
| 1.0033 | 3300 | 0.0001 | - |
|
223 |
+
| 1.0185 | 3350 | 0.0001 | - |
|
224 |
+
| 1.0337 | 3400 | 0.0001 | - |
|
225 |
+
| 1.0490 | 3450 | 0.0001 | - |
|
226 |
+
| 1.0642 | 3500 | 0.0001 | - |
|
227 |
+
| 1.0794 | 3550 | 0.0249 | - |
|
228 |
+
| 1.0946 | 3600 | 0.0002 | - |
|
229 |
+
| 1.1098 | 3650 | 0.0001 | - |
|
230 |
+
| 1.1250 | 3700 | 0.0001 | - |
|
231 |
+
| 1.1402 | 3750 | 0.0001 | - |
|
232 |
+
| 1.1554 | 3800 | 0.0001 | - |
|
233 |
+
| 1.1706 | 3850 | 0.0001 | - |
|
234 |
+
| 1.1858 | 3900 | 0.0001 | - |
|
235 |
+
| 1.2010 | 3950 | 0.0001 | - |
|
236 |
+
| 1.2162 | 4000 | 0.0001 | - |
|
237 |
+
| 1.2314 | 4050 | 0.0 | - |
|
238 |
+
| 1.2466 | 4100 | 0.0001 | - |
|
239 |
+
| 1.2618 | 4150 | 0.0 | - |
|
240 |
+
| 1.2770 | 4200 | 0.0001 | - |
|
241 |
+
| 1.2922 | 4250 | 0.0 | - |
|
242 |
+
| 1.3074 | 4300 | 0.0001 | - |
|
243 |
+
| 1.3226 | 4350 | 0.0001 | - |
|
244 |
+
| 1.3378 | 4400 | 0.0001 | - |
|
245 |
+
| 1.3530 | 4450 | 0.0001 | - |
|
246 |
+
| 1.3682 | 4500 | 0.0001 | - |
|
247 |
+
| 1.3834 | 4550 | 0.0001 | - |
|
248 |
+
| 1.3986 | 4600 | 0.0001 | - |
|
249 |
+
| 1.4138 | 4650 | 0.0001 | - |
|
250 |
+
| 1.4290 | 4700 | 0.0001 | - |
|
251 |
+
| 1.4442 | 4750 | 0.0001 | - |
|
252 |
+
| 1.4594 | 4800 | 0.0001 | - |
|
253 |
+
| 1.4746 | 4850 | 0.0001 | - |
|
254 |
+
| 1.4898 | 4900 | 0.0 | - |
|
255 |
+
| 1.5050 | 4950 | 0.0 | - |
|
256 |
+
| 1.5202 | 5000 | 0.0 | - |
|
257 |
+
| 1.5354 | 5050 | 0.0 | - |
|
258 |
+
| 1.5506 | 5100 | 0.0 | - |
|
259 |
+
| 1.5658 | 5150 | 0.0001 | - |
|
260 |
+
| 1.5810 | 5200 | 0.0001 | - |
|
261 |
+
| 1.5962 | 5250 | 0.0 | - |
|
262 |
+
| 1.6114 | 5300 | 0.0 | - |
|
263 |
+
| 1.6266 | 5350 | 0.0001 | - |
|
264 |
+
| 1.6418 | 5400 | 0.0001 | - |
|
265 |
+
| 1.6570 | 5450 | 0.0 | - |
|
266 |
+
| 1.6722 | 5500 | 0.0001 | - |
|
267 |
+
| 1.6874 | 5550 | 0.0 | - |
|
268 |
+
| 1.7026 | 5600 | 0.0001 | - |
|
269 |
+
| 1.7178 | 5650 | 0.0 | - |
|
270 |
+
| 1.7330 | 5700 | 0.0001 | - |
|
271 |
+
| 1.7483 | 5750 | 0.0001 | - |
|
272 |
+
| 1.7635 | 5800 | 0.0001 | - |
|
273 |
+
| 1.7787 | 5850 | 0.0001 | - |
|
274 |
+
| 1.7939 | 5900 | 0.0 | - |
|
275 |
+
| 1.8091 | 5950 | 0.0001 | - |
|
276 |
+
| 1.8243 | 6000 | 0.0001 | - |
|
277 |
+
| 1.8395 | 6050 | 0.0 | - |
|
278 |
+
| 1.8547 | 6100 | 0.0001 | - |
|
279 |
+
| 1.8699 | 6150 | 0.0 | - |
|
280 |
+
| 1.8851 | 6200 | 0.0 | - |
|
281 |
+
| 1.9003 | 6250 | 0.0 | - |
|
282 |
+
| 1.9155 | 6300 | 0.0 | - |
|
283 |
+
| 1.9307 | 6350 | 0.0001 | - |
|
284 |
+
| 1.9459 | 6400 | 0.0 | - |
|
285 |
+
| 1.9611 | 6450 | 0.0 | - |
|
286 |
+
| 1.9763 | 6500 | 0.0001 | - |
|
287 |
+
| 1.9915 | 6550 | 0.0 | - |
|
288 |
+
| **2.0** | **6578** | **-** | **0.0939** |
|
289 |
+
| 2.0067 | 6600 | 0.0001 | - |
|
290 |
+
| 2.0219 | 6650 | 0.0001 | - |
|
291 |
+
| 2.0371 | 6700 | 0.0001 | - |
|
292 |
+
| 2.0523 | 6750 | 0.0001 | - |
|
293 |
+
| 2.0675 | 6800 | 0.0 | - |
|
294 |
+
| 2.0827 | 6850 | 0.0 | - |
|
295 |
+
| 2.0979 | 6900 | 0.0 | - |
|
296 |
+
| 2.1131 | 6950 | 0.0 | - |
|
297 |
+
| 2.1283 | 7000 | 0.0001 | - |
|
298 |
+
| 2.1435 | 7050 | 0.0001 | - |
|
299 |
+
| 2.1587 | 7100 | 0.0 | - |
|
300 |
+
| 2.1739 | 7150 | 0.0 | - |
|
301 |
+
| 2.1891 | 7200 | 0.0001 | - |
|
302 |
+
| 2.2043 | 7250 | 0.0001 | - |
|
303 |
+
| 2.2195 | 7300 | 0.0 | - |
|
304 |
+
| 2.2347 | 7350 | 0.0 | - |
|
305 |
+
| 2.2499 | 7400 | 0.0 | - |
|
306 |
+
| 2.2651 | 7450 | 0.0 | - |
|
307 |
+
| 2.2803 | 7500 | 0.0 | - |
|
308 |
+
| 2.2955 | 7550 | 0.0001 | - |
|
309 |
+
| 2.3107 | 7600 | 0.0 | - |
|
310 |
+
| 2.3259 | 7650 | 0.0001 | - |
|
311 |
+
| 2.3411 | 7700 | 0.0 | - |
|
312 |
+
| 2.3563 | 7750 | 0.0001 | - |
|
313 |
+
| 2.3715 | 7800 | 0.0 | - |
|
314 |
+
| 2.3867 | 7850 | 0.0001 | - |
|
315 |
+
| 2.4019 | 7900 | 0.0 | - |
|
316 |
+
| 2.4171 | 7950 | 0.0 | - |
|
317 |
+
| 2.4324 | 8000 | 0.0 | - |
|
318 |
+
| 2.4476 | 8050 | 0.0001 | - |
|
319 |
+
| 2.4628 | 8100 | 0.0001 | - |
|
320 |
+
| 2.4780 | 8150 | 0.0 | - |
|
321 |
+
| 2.4932 | 8200 | 0.0001 | - |
|
322 |
+
| 2.5084 | 8250 | 0.0001 | - |
|
323 |
+
| 2.5236 | 8300 | 0.0001 | - |
|
324 |
+
| 2.5388 | 8350 | 0.0 | - |
|
325 |
+
| 2.5540 | 8400 | 0.0 | - |
|
326 |
+
| 2.5692 | 8450 | 0.0 | - |
|
327 |
+
| 2.5844 | 8500 | 0.0 | - |
|
328 |
+
| 2.5996 | 8550 | 0.0 | - |
|
329 |
+
| 2.6148 | 8600 | 0.0 | - |
|
330 |
+
| 2.6300 | 8650 | 0.0 | - |
|
331 |
+
| 2.6452 | 8700 | 0.0 | - |
|
332 |
+
| 2.6604 | 8750 | 0.0 | - |
|
333 |
+
| 2.6756 | 8800 | 0.0 | - |
|
334 |
+
| 2.6908 | 8850 | 0.0 | - |
|
335 |
+
| 2.7060 | 8900 | 0.0001 | - |
|
336 |
+
| 2.7212 | 8950 | 0.0 | - |
|
337 |
+
| 2.7364 | 9000 | 0.0 | - |
|
338 |
+
| 2.7516 | 9050 | 0.0001 | - |
|
339 |
+
| 2.7668 | 9100 | 0.0 | - |
|
340 |
+
| 2.7820 | 9150 | 0.0 | - |
|
341 |
+
| 2.7972 | 9200 | 0.0 | - |
|
342 |
+
| 2.8124 | 9250 | 0.0 | - |
|
343 |
+
| 2.8276 | 9300 | 0.0 | - |
|
344 |
+
| 2.8428 | 9350 | 0.0 | - |
|
345 |
+
| 2.8580 | 9400 | 0.0 | - |
|
346 |
+
| 2.8732 | 9450 | 0.0 | - |
|
347 |
+
| 2.8884 | 9500 | 0.0 | - |
|
348 |
+
| 2.9036 | 9550 | 0.0 | - |
|
349 |
+
| 2.9188 | 9600 | 0.0 | - |
|
350 |
+
| 2.9340 | 9650 | 0.0 | - |
|
351 |
+
| 2.9492 | 9700 | 0.0 | - |
|
352 |
+
| 2.9644 | 9750 | 0.0 | - |
|
353 |
+
| 2.9796 | 9800 | 0.0 | - |
|
354 |
+
| 2.9948 | 9850 | 0.0 | - |
|
355 |
+
| 3.0 | 9867 | - | 0.0951 |
|
356 |
+
| 3.0100 | 9900 | 0.0 | - |
|
357 |
+
| 3.0252 | 9950 | 0.0 | - |
|
358 |
+
| 3.0404 | 10000 | 0.0 | - |
|
359 |
+
| 3.0556 | 10050 | 0.0 | - |
|
360 |
+
| 3.0708 | 10100 | 0.0 | - |
|
361 |
+
| 3.0860 | 10150 | 0.0 | - |
|
362 |
+
| 3.1012 | 10200 | 0.0 | - |
|
363 |
+
| 3.1164 | 10250 | 0.0 | - |
|
364 |
+
| 3.1317 | 10300 | 0.0 | - |
|
365 |
+
| 3.1469 | 10350 | 0.0 | - |
|
366 |
+
| 3.1621 | 10400 | 0.0 | - |
|
367 |
+
| 3.1773 | 10450 | 0.0001 | - |
|
368 |
+
| 3.1925 | 10500 | 0.0 | - |
|
369 |
+
| 3.2077 | 10550 | 0.0 | - |
|
370 |
+
| 3.2229 | 10600 | 0.0 | - |
|
371 |
+
| 3.2381 | 10650 | 0.0 | - |
|
372 |
+
| 3.2533 | 10700 | 0.0 | - |
|
373 |
+
| 3.2685 | 10750 | 0.0 | - |
|
374 |
+
| 3.2837 | 10800 | 0.0 | - |
|
375 |
+
| 3.2989 | 10850 | 0.0 | - |
|
376 |
+
| 3.3141 | 10900 | 0.0 | - |
|
377 |
+
| 3.3293 | 10950 | 0.0 | - |
|
378 |
+
| 3.3445 | 11000 | 0.0 | - |
|
379 |
+
| 3.3597 | 11050 | 0.0 | - |
|
380 |
+
| 3.3749 | 11100 | 0.0 | - |
|
381 |
+
| 3.3901 | 11150 | 0.0 | - |
|
382 |
+
| 3.4053 | 11200 | 0.0 | - |
|
383 |
+
| 3.4205 | 11250 | 0.0 | - |
|
384 |
+
| 3.4357 | 11300 | 0.0 | - |
|
385 |
+
| 3.4509 | 11350 | 0.0 | - |
|
386 |
+
| 3.4661 | 11400 | 0.0 | - |
|
387 |
+
| 3.4813 | 11450 | 0.0 | - |
|
388 |
+
| 3.4965 | 11500 | 0.0 | - |
|
389 |
+
| 3.5117 | 11550 | 0.0 | - |
|
390 |
+
| 3.5269 | 11600 | 0.0 | - |
|
391 |
+
| 3.5421 | 11650 | 0.0 | - |
|
392 |
+
| 3.5573 | 11700 | 0.0 | - |
|
393 |
+
| 3.5725 | 11750 | 0.0 | - |
|
394 |
+
| 3.5877 | 11800 | 0.0 | - |
|
395 |
+
| 3.6029 | 11850 | 0.0 | - |
|
396 |
+
| 3.6181 | 11900 | 0.0 | - |
|
397 |
+
| 3.6333 | 11950 | 0.0 | - |
|
398 |
+
| 3.6485 | 12000 | 0.0 | - |
|
399 |
+
| 3.6637 | 12050 | 0.0 | - |
|
400 |
+
| 3.6789 | 12100 | 0.0 | - |
|
401 |
+
| 3.6941 | 12150 | 0.0 | - |
|
402 |
+
| 3.7093 | 12200 | 0.0 | - |
|
403 |
+
| 3.7245 | 12250 | 0.0 | - |
|
404 |
+
| 3.7397 | 12300 | 0.0 | - |
|
405 |
+
| 3.7549 | 12350 | 0.0 | - |
|
406 |
+
| 3.7701 | 12400 | 0.0 | - |
|
407 |
+
| 3.7853 | 12450 | 0.0 | - |
|
408 |
+
| 3.8005 | 12500 | 0.0 | - |
|
409 |
+
| 3.8157 | 12550 | 0.0 | - |
|
410 |
+
| 3.8310 | 12600 | 0.0 | - |
|
411 |
+
| 3.8462 | 12650 | 0.0 | - |
|
412 |
+
| 3.8614 | 12700 | 0.0 | - |
|
413 |
+
| 3.8766 | 12750 | 0.0 | - |
|
414 |
+
| 3.8918 | 12800 | 0.0 | - |
|
415 |
+
| 3.9070 | 12850 | 0.0 | - |
|
416 |
+
| 3.9222 | 12900 | 0.0 | - |
|
417 |
+
| 3.9374 | 12950 | 0.0 | - |
|
418 |
+
| 3.9526 | 13000 | 0.0 | - |
|
419 |
+
| 3.9678 | 13050 | 0.0 | - |
|
420 |
+
| 3.9830 | 13100 | 0.0 | - |
|
421 |
+
| 3.9982 | 13150 | 0.0 | - |
|
422 |
+
| 4.0 | 13156 | - | 0.0954 |
|
423 |
|
424 |
* The bold row denotes the saved checkpoint.
|
425 |
### Framework Versions
|
426 |
- Python: 3.10.12
|
427 |
- SetFit: 1.0.3
|
428 |
+
- Sentence Transformers: 3.0.1
|
429 |
- Transformers: 4.39.0
|
430 |
- PyTorch: 2.3.0+cu121
|
431 |
- Datasets: 2.19.2
|
config.json
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
{
|
2 |
-
"_name_or_path": "checkpoints/
|
3 |
"architectures": [
|
4 |
"BertModel"
|
5 |
],
|
@@ -15,7 +15,7 @@
|
|
15 |
"max_position_embeddings": 512,
|
16 |
"model_type": "bert",
|
17 |
"num_attention_heads": 12,
|
18 |
-
"num_hidden_layers":
|
19 |
"pad_token_id": 0,
|
20 |
"position_embedding_type": "absolute",
|
21 |
"torch_dtype": "float32",
|
|
|
1 |
{
|
2 |
+
"_name_or_path": "checkpoints/step_6578",
|
3 |
"architectures": [
|
4 |
"BertModel"
|
5 |
],
|
|
|
15 |
"max_position_embeddings": 512,
|
16 |
"model_type": "bert",
|
17 |
"num_attention_heads": 12,
|
18 |
+
"num_hidden_layers": 12,
|
19 |
"pad_token_id": 0,
|
20 |
"position_embedding_type": "absolute",
|
21 |
"torch_dtype": "float32",
|
config_sentence_transformers.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"__version__": {
|
3 |
-
"sentence_transformers": "
|
4 |
-
"transformers": "4.
|
5 |
-
"pytorch": "
|
6 |
},
|
7 |
"prompts": {},
|
8 |
"default_prompt_name": null,
|
|
|
1 |
{
|
2 |
"__version__": {
|
3 |
+
"sentence_transformers": "3.0.1",
|
4 |
+
"transformers": "4.39.0",
|
5 |
+
"pytorch": "2.3.0+cu121"
|
6 |
},
|
7 |
"prompts": {},
|
8 |
"default_prompt_name": null,
|
model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:04599198421431c2dcf78a32aeca35ab0c7bc44059bf43fe0cd036255eb230d0
|
3 |
+
size 133462128
|
model_head.pkl
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ed908d757b1282930b7cdca8ecc4ba64665f481175290d63979d3f7c9d2ed9e2
|
3 |
+
size 16559
|
sentence_bert_config.json
CHANGED
@@ -1,4 +1,4 @@
|
|
1 |
{
|
2 |
-
"max_seq_length":
|
3 |
"do_lower_case": false
|
4 |
}
|
|
|
1 |
{
|
2 |
+
"max_seq_length": 128,
|
3 |
"do_lower_case": false
|
4 |
}
|
tokenizer.json
CHANGED
@@ -2,7 +2,7 @@
|
|
2 |
"version": "1.0",
|
3 |
"truncation": {
|
4 |
"direction": "Right",
|
5 |
-
"max_length":
|
6 |
"strategy": "LongestFirst",
|
7 |
"stride": 0
|
8 |
},
|
|
|
2 |
"version": "1.0",
|
3 |
"truncation": {
|
4 |
"direction": "Right",
|
5 |
+
"max_length": 128,
|
6 |
"strategy": "LongestFirst",
|
7 |
"stride": 0
|
8 |
},
|
tokenizer_config.json
CHANGED
@@ -47,7 +47,7 @@
|
|
47 |
"do_lower_case": true,
|
48 |
"mask_token": "[MASK]",
|
49 |
"max_length": 128,
|
50 |
-
"model_max_length":
|
51 |
"never_split": null,
|
52 |
"pad_to_multiple_of": null,
|
53 |
"pad_token": "[PAD]",
|
|
|
47 |
"do_lower_case": true,
|
48 |
"mask_token": "[MASK]",
|
49 |
"max_length": 128,
|
50 |
+
"model_max_length": 128,
|
51 |
"never_split": null,
|
52 |
"pad_to_multiple_of": null,
|
53 |
"pad_token": "[PAD]",
|