bigint
commited on
Commit
•
c0fd5ed
1
Parent(s):
cef83d4
chore: update readme
Browse files
README.md
CHANGED
@@ -1,100 +1,23 @@
|
|
1 |
---
|
2 |
language: en
|
3 |
widget:
|
4 |
-
- text: It is great to see athletes promoting awareness for climate change.
|
5 |
datasets:
|
6 |
-
- cardiffnlp/tweet_topic_multi
|
7 |
license: mit
|
8 |
metrics:
|
9 |
-
- f1
|
10 |
-
- accuracy
|
11 |
pipeline_tag: text-classification
|
12 |
---
|
13 |
|
14 |
-
#
|
15 |
|
16 |
-
|
17 |
|
18 |
-
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
|
24 |
-
|-----------------------------|---------------------|----------------------------|--------------------------|
|
25 |
-
| 1: business_&_entrepreneurs | 6: film_tv_&_video | 11: music | 16: sports |
|
26 |
-
| 2: celebrity_&_pop_culture | 7: fitness_&_health | 12: news_&_social_concern | 17: travel_&_adventure |
|
27 |
-
| 3: diaries_&_daily_life | 8: food_&_dining | 13: other_hobbies | 18: youth_&_student_life |
|
28 |
-
| 4: family | 9: gaming | 14: relationships | |
|
29 |
-
|
30 |
-
|
31 |
-
## Full classification example
|
32 |
-
|
33 |
-
```python
|
34 |
-
from transformers import AutoModelForSequenceClassification, TFAutoModelForSequenceClassification
|
35 |
-
from transformers import AutoTokenizer
|
36 |
-
import numpy as np
|
37 |
-
from scipy.special import expit
|
38 |
-
|
39 |
-
|
40 |
-
MODEL = f"cardiffnlp/tweet-topic-21-multi"
|
41 |
-
tokenizer = AutoTokenizer.from_pretrained(MODEL)
|
42 |
-
|
43 |
-
# PT
|
44 |
-
model = AutoModelForSequenceClassification.from_pretrained(MODEL)
|
45 |
-
class_mapping = model.config.id2label
|
46 |
-
|
47 |
-
text = "It is great to see athletes promoting awareness for climate change."
|
48 |
-
tokens = tokenizer(text, return_tensors='pt')
|
49 |
-
output = model(**tokens)
|
50 |
-
|
51 |
-
scores = output[0][0].detach().numpy()
|
52 |
-
scores = expit(scores)
|
53 |
-
predictions = (scores >= 0.5) * 1
|
54 |
-
|
55 |
-
|
56 |
-
# TF
|
57 |
-
#tf_model = TFAutoModelForSequenceClassification.from_pretrained(MODEL)
|
58 |
-
#class_mapping = tf_model.config.id2label
|
59 |
-
#text = "It is great to see athletes promoting awareness for climate change."
|
60 |
-
#tokens = tokenizer(text, return_tensors='tf')
|
61 |
-
#output = tf_model(**tokens)
|
62 |
-
#scores = output[0][0]
|
63 |
-
#scores = expit(scores)
|
64 |
-
#predictions = (scores >= 0.5) * 1
|
65 |
-
|
66 |
-
# Map to classes
|
67 |
-
for i in range(len(predictions)):
|
68 |
-
if predictions[i]:
|
69 |
-
print(class_mapping[i])
|
70 |
-
|
71 |
-
```
|
72 |
-
Output:
|
73 |
-
|
74 |
-
```
|
75 |
-
news_&_social_concern
|
76 |
-
sports
|
77 |
-
```
|
78 |
-
|
79 |
-
### BibTeX entry and citation info
|
80 |
-
|
81 |
-
Please cite the [reference paper](https://aclanthology.org/2022.coling-1.299/) if you use this model.
|
82 |
-
|
83 |
-
```bibtex
|
84 |
-
@inproceedings{antypas-etal-2022-twitter,
|
85 |
-
title = "{T}witter Topic Classification",
|
86 |
-
author = "Antypas, Dimosthenis and
|
87 |
-
Ushio, Asahi and
|
88 |
-
Camacho-Collados, Jose and
|
89 |
-
Silva, Vitor and
|
90 |
-
Neves, Leonardo and
|
91 |
-
Barbieri, Francesco",
|
92 |
-
booktitle = "Proceedings of the 29th International Conference on Computational Linguistics",
|
93 |
-
month = oct,
|
94 |
-
year = "2022",
|
95 |
-
address = "Gyeongju, Republic of Korea",
|
96 |
-
publisher = "International Committee on Computational Linguistics",
|
97 |
-
url = "https://aclanthology.org/2022.coling-1.299",
|
98 |
-
pages = "3386--3400"
|
99 |
-
}
|
100 |
-
```
|
|
|
1 |
---
|
2 |
language: en
|
3 |
widget:
|
4 |
+
- text: It is great to see athletes promoting awareness for climate change.
|
5 |
datasets:
|
6 |
+
- cardiffnlp/tweet_topic_multi
|
7 |
license: mit
|
8 |
metrics:
|
9 |
+
- f1
|
10 |
+
- accuracy
|
11 |
pipeline_tag: text-classification
|
12 |
---
|
13 |
|
14 |
+
# Lenster Tagger
|
15 |
|
16 |
+
<b>Labels</b>:
|
17 |
|
18 |
+
| <span style="font-weight:normal">0: arts\_&_culture</span> | <span style="font-weight:normal">5: fashion\_&_style</span> | <span style="font-weight:normal">10: learning\_&_educational</span> | <span style="font-weight:normal">15: science\_&_technology</span> |
|
19 |
+
| ---------------------------------------------------------- | ----------------------------------------------------------- | ------------------------------------------------------------------- | ----------------------------------------------------------------- |
|
20 |
+
| 1: business\_&_entrepreneurs | 6: film*tv*&\_video | 11: music | 16: sports |
|
21 |
+
| 2: celebrity\_&_pop_culture | 7: fitness\_&_health | 12: news\_&_social_concern | 17: travel\_&_adventure |
|
22 |
+
| 3: diaries\_&_daily_life | 8: food\_&_dining | 13: other_hobbies | 18: youth\_&_student_life |
|
23 |
+
| 4: family | 9: gaming | 14: relationships | |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|