Update README.md
Browse files- README.md +66 -62
- predict.py +0 -5
README.md
CHANGED
@@ -140,74 +140,78 @@ res = classifier("réveille-moi à neuf heures du matin le vendredi")
|
|
140 |
print(res)
|
141 |
```
|
142 |
|
|
|
|
|
|
|
|
|
|
|
|
|
143 |
## Training data
|
144 |
|
145 |
[MASSIVE](https://huggingface.co/datasets/qanastek/MASSIVE) is a parallel dataset of > 1M utterances across 51 languages with annotations for the Natural Language Understanding tasks of intent prediction and slot annotation. Utterances span 60 intents and include 55 slot types. MASSIVE was created by localizing the SLURP dataset, composed of general Intelligent Voice Assistant single-shot interactions.
|
146 |
|
147 |
## Intents
|
148 |
|
149 |
-
|
150 |
-
|
151 |
-
|
152 |
-
|
153 |
-
|
154 |
-
|
155 |
-
|
156 |
-
|
157 |
-
|
158 |
-
|
159 |
-
|
160 |
-
|
161 |
-
|
162 |
-
|
163 |
-
|
164 |
-
|
165 |
-
|
166 |
-
|
167 |
-
|
168 |
-
|
169 |
-
|
170 |
-
|
171 |
-
|
172 |
-
|
173 |
-
|
174 |
-
|
175 |
-
|
176 |
-
|
177 |
-
|
178 |
-
|
179 |
-
|
180 |
-
|
181 |
-
|
182 |
-
|
183 |
-
|
184 |
-
|
185 |
-
|
186 |
-
|
187 |
-
|
188 |
-
|
189 |
-
|
190 |
-
|
191 |
-
|
192 |
-
|
193 |
-
|
194 |
-
|
195 |
-
|
196 |
-
|
197 |
-
|
198 |
-
|
199 |
-
|
200 |
-
|
201 |
-
|
202 |
-
|
203 |
-
|
204 |
-
|
205 |
-
|
206 |
-
|
207 |
-
|
208 |
-
|
209 |
-
music_dislikeness
|
210 |
-
```
|
211 |
|
212 |
## Evaluation results
|
213 |
|
|
|
140 |
print(res)
|
141 |
```
|
142 |
|
143 |
+
Outputs:
|
144 |
+
|
145 |
+
```python
|
146 |
+
[{'label': 'alarm_set', 'score': 0.9998375177383423}]
|
147 |
+
```
|
148 |
+
|
149 |
## Training data
|
150 |
|
151 |
[MASSIVE](https://huggingface.co/datasets/qanastek/MASSIVE) is a parallel dataset of > 1M utterances across 51 languages with annotations for the Natural Language Understanding tasks of intent prediction and slot annotation. Utterances span 60 intents and include 55 slot types. MASSIVE was created by localizing the SLURP dataset, composed of general Intelligent Voice Assistant single-shot interactions.
|
152 |
|
153 |
## Intents
|
154 |
|
155 |
+
* audio_volume_other
|
156 |
+
* play_music
|
157 |
+
* iot_hue_lighton
|
158 |
+
* general_greet
|
159 |
+
* calendar_set
|
160 |
+
* audio_volume_down
|
161 |
+
* social_query
|
162 |
+
* audio_volume_mute
|
163 |
+
* iot_wemo_on
|
164 |
+
* iot_hue_lightup
|
165 |
+
* audio_volume_up
|
166 |
+
* iot_coffee
|
167 |
+
* takeaway_query
|
168 |
+
* qa_maths
|
169 |
+
* play_game
|
170 |
+
* cooking_query
|
171 |
+
* iot_hue_lightdim
|
172 |
+
* iot_wemo_off
|
173 |
+
* music_settings
|
174 |
+
* weather_query
|
175 |
+
* news_query
|
176 |
+
* alarm_remove
|
177 |
+
* social_post
|
178 |
+
* recommendation_events
|
179 |
+
* transport_taxi
|
180 |
+
* takeaway_order
|
181 |
+
* music_query
|
182 |
+
* calendar_query
|
183 |
+
* lists_query
|
184 |
+
* qa_currency
|
185 |
+
* recommendation_movies
|
186 |
+
* general_joke
|
187 |
+
* recommendation_locations
|
188 |
+
* email_querycontact
|
189 |
+
* lists_remove
|
190 |
+
* play_audiobook
|
191 |
+
* email_addcontact
|
192 |
+
* lists_createoradd
|
193 |
+
* play_radio
|
194 |
+
* qa_stock
|
195 |
+
* alarm_query
|
196 |
+
* email_sendemail
|
197 |
+
* general_quirky
|
198 |
+
* music_likeness
|
199 |
+
* cooking_recipe
|
200 |
+
* email_query
|
201 |
+
* datetime_query
|
202 |
+
* transport_traffic
|
203 |
+
* play_podcasts
|
204 |
+
* iot_hue_lightchange
|
205 |
+
* calendar_remove
|
206 |
+
* transport_query
|
207 |
+
* transport_ticket
|
208 |
+
* qa_factoid
|
209 |
+
* iot_cleaning
|
210 |
+
* alarm_set
|
211 |
+
* datetime_convert
|
212 |
+
* iot_hue_lightoff
|
213 |
+
* qa_definition
|
214 |
+
* music_dislikeness
|
|
|
|
|
215 |
|
216 |
## Evaluation results
|
217 |
|
predict.py
CHANGED
@@ -1,7 +1,5 @@
|
|
1 |
from transformers import AutoTokenizer, AutoModelForSequenceClassification, TextClassificationPipeline
|
2 |
|
3 |
-
classes = ['audio_volume_other', 'play_music', 'iot_hue_lighton', 'general_greet', 'calendar_set', 'audio_volume_down', 'social_query', 'audio_volume_mute', 'iot_wemo_on', 'iot_hue_lightup', 'audio_volume_up', 'iot_coffee', 'takeaway_query', 'qa_maths', 'play_game', 'cooking_query', 'iot_hue_lightdim', 'iot_wemo_off', 'music_settings', 'weather_query', 'news_query', 'alarm_remove', 'social_post', 'recommendation_events', 'transport_taxi', 'takeaway_order', 'music_query', 'calendar_query', 'lists_query', 'qa_currency', 'recommendation_movies', 'general_joke', 'recommendation_locations', 'email_querycontact', 'lists_remove', 'play_audiobook', 'email_addcontact', 'lists_createoradd', 'play_radio', 'qa_stock', 'alarm_query', 'email_sendemail', 'general_quirky', 'music_likeness', 'cooking_recipe', 'email_query', 'datetime_query', 'transport_traffic', 'play_podcasts', 'iot_hue_lightchange', 'calendar_remove', 'transport_query', 'transport_ticket', 'qa_factoid', 'iot_cleaning', 'alarm_set', 'datetime_convert', 'iot_hue_lightoff', 'qa_definition', 'music_dislikeness']
|
4 |
-
|
5 |
model_name = 'qanastek/XLMRoberta-Alexa-Intents-Classification'
|
6 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
7 |
model = AutoModelForSequenceClassification.from_pretrained(model_name)
|
@@ -9,6 +7,3 @@ classifier = TextClassificationPipeline(model=model, tokenizer=tokenizer)
|
|
9 |
|
10 |
res = classifier("réveille-moi à neuf heures du matin le vendredi")
|
11 |
print(res)
|
12 |
-
# idx = int(res[0]["label"].split("_")[-1])
|
13 |
-
# print(idx)
|
14 |
-
# print(classes[idx])
|
|
|
1 |
from transformers import AutoTokenizer, AutoModelForSequenceClassification, TextClassificationPipeline
|
2 |
|
|
|
|
|
3 |
model_name = 'qanastek/XLMRoberta-Alexa-Intents-Classification'
|
4 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
5 |
model = AutoModelForSequenceClassification.from_pretrained(model_name)
|
|
|
7 |
|
8 |
res = classifier("réveille-moi à neuf heures du matin le vendredi")
|
9 |
print(res)
|
|
|
|
|
|