Spaces:
Build error
Build error
avacaondata
commited on
Commit
•
56a498b
1
Parent(s):
13beaa4
añadidos cambios article
Browse files- app.py +2 -12
- article_app.py +37 -13
app.py
CHANGED
@@ -36,14 +36,6 @@ models = {
|
|
36 |
"IIC/wav2vec2-spanish-multilibrispeech"
|
37 |
),
|
38 |
},
|
39 |
-
# "wav2vec2-jonatangrosman": {
|
40 |
-
# "processor": Wav2Vec2Tokenizer.from_pretrained(
|
41 |
-
# "jonatasgrosman/wav2vec2-large-xlsr-53-spanish"
|
42 |
-
# ),
|
43 |
-
# "model": AutoModelForCTC.from_pretrained(
|
44 |
-
# "jonatasgrosman/wav2vec2-large-xlsr-53-spanish"
|
45 |
-
# ),
|
46 |
-
# },
|
47 |
}
|
48 |
|
49 |
|
@@ -80,7 +72,7 @@ similarity_model = SentenceTransformer(
|
|
80 |
"distiluse-base-multilingual-cased", device="cpu"
|
81 |
)
|
82 |
|
83 |
-
crossencoder = CrossEncoder("
|
84 |
|
85 |
dataset = load_dataset("IIC/spanish_biomedical_crawled_corpus", split="train")
|
86 |
|
@@ -228,7 +220,7 @@ if __name__ == "__main__":
|
|
228 |
step=1,
|
229 |
),
|
230 |
gr.inputs.Dropdown(
|
231 |
-
["wav2vec2-iic"
|
232 |
type="value",
|
233 |
default=None,
|
234 |
label="Select the speech recognition model.",
|
@@ -239,12 +231,10 @@ if __name__ == "__main__":
|
|
239 |
],
|
240 |
outputs=[
|
241 |
gr.outputs.HTML(
|
242 |
-
# type="str",
|
243 |
label="Answer from the system."
|
244 |
),
|
245 |
gr.outputs.Audio(label="Answer in audio"),
|
246 |
],
|
247 |
-
# title="Abstractive QA of BioMedical Domain in Spanish",
|
248 |
description=description,
|
249 |
examples=examples,
|
250 |
theme="grass",
|
|
|
36 |
"IIC/wav2vec2-spanish-multilibrispeech"
|
37 |
),
|
38 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
39 |
}
|
40 |
|
41 |
|
|
|
72 |
"distiluse-base-multilingual-cased", device="cpu"
|
73 |
)
|
74 |
|
75 |
+
crossencoder = CrossEncoder("IIC/roberta-base-bne-ranker", device="cpu")
|
76 |
|
77 |
dataset = load_dataset("IIC/spanish_biomedical_crawled_corpus", split="train")
|
78 |
|
|
|
220 |
step=1,
|
221 |
),
|
222 |
gr.inputs.Dropdown(
|
223 |
+
["wav2vec2-iic"],
|
224 |
type="value",
|
225 |
default=None,
|
226 |
label="Select the speech recognition model.",
|
|
|
231 |
],
|
232 |
outputs=[
|
233 |
gr.outputs.HTML(
|
|
|
234 |
label="Answer from the system."
|
235 |
),
|
236 |
gr.outputs.Audio(label="Answer in audio"),
|
237 |
],
|
|
|
238 |
description=description,
|
239 |
examples=examples,
|
240 |
theme="grass",
|
article_app.py
CHANGED
@@ -9,27 +9,27 @@ have been introduced to build this app.
|
|
9 |
The reason for including audio as a possible input and always as an output is because we wanted to make the App much more accessible to people that cannot read or write.
|
10 |
Below you can find all the pieces that form the system.
|
11 |
|
12 |
-
1. <a href="https://
|
13 |
-
2. <a href="https://
|
14 |
that is, the task of getting the most relevant passages to answer a given question with. You can find details about how it was trained on the link attached to the name.
|
15 |
-
3. <a href="https://
|
16 |
-
4. <a href="https://
|
17 |
-
5. <a href="https://
|
18 |
passages and uses them to generate an answer to the question. In the attached link there are more details about how we trained it etc.
|
19 |
|
20 |
On the other hand, we uploaded, and in some cases created, datasets in Spanish to be able to build such a system.
|
21 |
|
22 |
-
1. <a href="https://
|
23 |
-
2. <a href="https://
|
24 |
-
3. <a href="https://
|
25 |
-
4. <a href="https://
|
26 |
-
5. <a href="https://
|
27 |
</p>
|
28 |
"""
|
29 |
-
|
30 |
description = """
|
31 |
<a href="https://www.iic.uam.es/">
|
32 |
-
<img src="https://drive.google.com/uc?export=view&id=
|
33 |
</a>
|
34 |
<h1> BioMedIA: Abstractive Question Answering of BioMedical Domain in Spanish </h1>
|
35 |
Esta aplicación consiste en sistemas de búsqueda del Estado del Arte en Español junto con un modelo generativo entrenado para componer una respuesta a preguntas a partir de una serie de contextos.
|
@@ -49,6 +49,30 @@ examples = [
|
|
49 |
"wav2vec2-iic",
|
50 |
False,
|
51 |
],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
52 |
[
|
53 |
"¿Qué alternativas al Paracetamol existen para el dolor de cabeza?",
|
54 |
"vacio.flac",
|
@@ -98,7 +122,7 @@ examples = [
|
|
98 |
False
|
99 |
],
|
100 |
[
|
101 |
-
"¿Qué deficiencia es la causa del síndrome de piernas inquietas
|
102 |
"vacio.flac",
|
103 |
"vacio.flac",
|
104 |
50,
|
|
|
9 |
The reason for including audio as a possible input and always as an output is because we wanted to make the App much more accessible to people that cannot read or write.
|
10 |
Below you can find all the pieces that form the system.
|
11 |
|
12 |
+
1. <a href="https://hf.co/IIC/wav2vec2-spanish-multilibrispeech">Speech2Text</a>: For this we finedtuned a multilingual Wav2Vec2, as explained in the attached link. We use this model to process audio questions.
|
13 |
+
2. <a href="https://hf.co/IIC/dpr-spanish-passage_encoder-allqa-base">Dense Passage Retrieval for Context</a>: Dense Passage Retrieval is a methodology <a href="https://arxiv.org/abs/2004.04906">developed by Facebook</a> which is currently the SoTA for Passage Retrieval,
|
14 |
that is, the task of getting the most relevant passages to answer a given question with. You can find details about how it was trained on the link attached to the name.
|
15 |
+
3. <a href="https://hf.co/IIC/dpr-spanish-question_encoder-allqa-base">Dense Passage Retrieval for Question</a>: It is actually part of the same thing as the above. For more details, go to the attached link.
|
16 |
+
4. <a href="https://hf.co/sentence-transformers/distiluse-base-multilingual-cased-v1">Sentence Encoder Ranker</a>: To rerank the candidate contexts retrieved by dpr for the generative model to see. This also selects the top 5 passages for the model to read, it is the final filter before the generative model.
|
17 |
+
5. <a href="https://hf.co/IIC/mt5-base-lfqa-es">Generative Long-Form Question Answering Model</a>: For this we used either mT5 (the one attached) or <a href="https://hf.co/IIC/mbart-large-lfqa-es">mBART</a>. This generative model receives the most relevant
|
18 |
passages and uses them to generate an answer to the question. In the attached link there are more details about how we trained it etc.
|
19 |
|
20 |
On the other hand, we uploaded, and in some cases created, datasets in Spanish to be able to build such a system.
|
21 |
|
22 |
+
1. <a href="https://hf.co/datasets/IIC/spanish_biomedical_crawled_corpus">Spanish Biomedical Crawled Corpus</a>. Used for finding answers to questions about biomedicine. (More info in the link.)
|
23 |
+
2. <a href="https://hf.co/datasets/IIC/lfqa_spanish">LFQA_Spanish</a>. Used for training the generative model. (More info in the link.)
|
24 |
+
3. <a href="https://hf.co/datasets/squad_es">SQUADES</a>. Used to train the DPR models. (More info in the link.)
|
25 |
+
4. <a href="https://hf.co/datasets/IIC/bioasq22_es">BioAsq22-Spanish</a>. Used to train the DPR models. (More info in the link.)
|
26 |
+
5. <a href="https://hf.co/datasets/PlanTL-GOB-ES/SQAC">SQAC (Spanish Question Answering Corpus)</a>. Used to train the DPR models. (More info in the link.)
|
27 |
</p>
|
28 |
"""
|
29 |
+
|
30 |
description = """
|
31 |
<a href="https://www.iic.uam.es/">
|
32 |
+
<img src="https://drive.google.com/uc?export=view&id=1kvHDFUPPnf1kM5EKlv5Ife2KcZZvva_1" style="max-width: 100%; max-height: 10%; height: 250px; object-fit: fill">,
|
33 |
</a>
|
34 |
<h1> BioMedIA: Abstractive Question Answering of BioMedical Domain in Spanish </h1>
|
35 |
Esta aplicación consiste en sistemas de búsqueda del Estado del Arte en Español junto con un modelo generativo entrenado para componer una respuesta a preguntas a partir de una serie de contextos.
|
|
|
49 |
"wav2vec2-iic",
|
50 |
False,
|
51 |
],
|
52 |
+
[
|
53 |
+
"¿Por qué sentimos ansiedad?",
|
54 |
+
"vacio.flac",
|
55 |
+
"vacio.flac",
|
56 |
+
50,
|
57 |
+
8,
|
58 |
+
3,
|
59 |
+
1.0,
|
60 |
+
250,
|
61 |
+
"wav2vec2-iic",
|
62 |
+
False,
|
63 |
+
],
|
64 |
+
[
|
65 |
+
"¿Qué es la mesoterapia?",
|
66 |
+
"vacio.flac",
|
67 |
+
"vacio.flac",
|
68 |
+
50,
|
69 |
+
8,
|
70 |
+
3,
|
71 |
+
1.0,
|
72 |
+
250,
|
73 |
+
"wav2vec2-iic",
|
74 |
+
False,
|
75 |
+
],
|
76 |
[
|
77 |
"¿Qué alternativas al Paracetamol existen para el dolor de cabeza?",
|
78 |
"vacio.flac",
|
|
|
122 |
False
|
123 |
],
|
124 |
[
|
125 |
+
"¿Qué deficiencia es la causa del síndrome de piernas inquietas?",
|
126 |
"vacio.flac",
|
127 |
"vacio.flac",
|
128 |
50,
|