Victoria Slocum commited on
Commit
d04bf10
1 Parent(s): c0dee52

Feat: Add model change

Browse files
Files changed (2) hide show
  1. app.py +47 -27
  2. requirements.txt +91 -5
app.py CHANGED
@@ -4,17 +4,30 @@ import random
4
  from spacy.tokens import Span
5
  import gradio as gr
6
 
7
- DEFAULT_MODEL = "en_core_web_sm"
8
  DEFAULT_TEXT = "David Bowie moved to the US in 1974, initially staying in New York City before settling in Los Angeles."
9
  DEFAULT_TOK_ATTR = ['idx', 'text', 'pos_', 'lemma_', 'shape_', 'dep_']
10
  DEFAULT_ENTS = ['CARDINAL', 'DATE', 'EVENT', 'FAC', 'GPE', 'LANGUAGE', 'LAW', 'LOC', 'MONEY',
11
  'NORP', 'ORDINAL', 'ORG', 'PERCENT', 'PERSON', 'PRODUCT', 'QUANTITY', 'TIME', 'WORK_OF_ART']
12
 
13
- nlp = spacy.load("en_core_web_sm")
14
- nlp2 = spacy.load("en_core_web_md")
15
 
 
 
 
 
 
 
 
 
 
 
16
 
17
- def dependency(text, col_punct, col_phrase, compact):
 
 
 
 
 
18
  doc = nlp(text)
19
  options = {"compact": compact, "collapse_phrases": col_phrase,
20
  "collapse_punct": col_punct}
@@ -22,19 +35,16 @@ def dependency(text, col_punct, col_phrase, compact):
22
  return html
23
 
24
 
25
- def entity(text, ents):
 
26
  doc = nlp(text)
27
  options = {"ents": ents}
28
  html = displacy.render(doc, style="ent", options=options)
29
  return html
30
 
31
 
32
- def text(default):
33
- if default:
34
- return default
35
-
36
-
37
- def token(text, attributes):
38
  data = []
39
  doc = nlp(text)
40
  for tok in doc:
@@ -45,8 +55,9 @@ def token(text, attributes):
45
  return data
46
 
47
 
48
- def vectors(text):
49
- doc = nlp2(text)
 
50
  n_chunks = [chunk for chunk in doc.noun_chunks]
51
  words = [tok for tok in doc if not tok.is_stop and tok.pos_ not in [
52
  'PUNCT', "PROPN"]]
@@ -55,7 +66,8 @@ def vectors(text):
55
  return round(choice[0].similarity(choice[1]), 2), choice[0].text, choice[1].text
56
 
57
 
58
- def span(text, span1, span2, label1, label2):
 
59
  doc = nlp(text)
60
  idx1_1 = 0
61
  idx1_2 = 0
@@ -88,8 +100,9 @@ def span(text, span1, span2, label1, label2):
88
  demo = gr.Blocks()
89
 
90
  with demo:
91
- # gr.Markdown("Input text here!")
92
  text_input = gr.Textbox(value=DEFAULT_TEXT, interactive=True)
 
 
93
  with gr.Tabs():
94
  with gr.TabItem("Dependency"):
95
  col_punct = gr.Checkbox(label="Collapse Punctuation", value=True)
@@ -102,9 +115,11 @@ with demo:
102
  entity_output = gr.HTML()
103
  entity_button = gr.Button("Generate")
104
  with gr.TabItem("Tokens"):
105
- tok_input = gr.CheckboxGroup(
106
- DEFAULT_TOK_ATTR, value=DEFAULT_TOK_ATTR)
107
- tok_output = gr.Dataframe()
 
 
108
  tok_button = gr.Button("Generate")
109
  with gr.TabItem("Similarity"):
110
  sim_text1 = gr.Textbox(value="David Bowie", label="Chosen")
@@ -112,21 +127,26 @@ with demo:
112
  sim_output = gr.Textbox(value="0.09", label="Similarity Score")
113
  sim_button = gr.Button("Generate")
114
  with gr.TabItem("Spans"):
115
- span1 = gr.Textbox(value="David Bowie", label="Span 1")
116
- label1 = gr.Textbox(value="Full Name", label="Label for Span 1")
117
- span2 = gr.Textbox(value="David", label="Span 2")
118
- label2 = gr.Textbox(value="First Name", label="Label for Span 2")
 
 
 
 
119
  span_output = gr.HTML()
120
  span_button = gr.Button("Generate")
121
 
122
  depen_button.click(dependency, inputs=[
123
- text_input, col_punct, col_phrase, compact], outputs=depen_output)
124
  entity_button.click(
125
- entity, inputs=[text_input, entity_input], outputs=entity_output)
126
- tok_button.click(token, inputs=[text_input, tok_input], outputs=tok_output)
127
- sim_button.click(vectors, inputs=[text_input], outputs=[
 
128
  sim_output, sim_text1, sim_text2])
129
  span_button.click(
130
- span, inputs=[text_input, span1, span2, label1, label2], outputs=span_output)
131
 
132
  demo.launch()
 
4
  from spacy.tokens import Span
5
  import gradio as gr
6
 
7
+ DEFAULT_MODEL = "en_core_web"
8
  DEFAULT_TEXT = "David Bowie moved to the US in 1974, initially staying in New York City before settling in Los Angeles."
9
  DEFAULT_TOK_ATTR = ['idx', 'text', 'pos_', 'lemma_', 'shape_', 'dep_']
10
  DEFAULT_ENTS = ['CARDINAL', 'DATE', 'EVENT', 'FAC', 'GPE', 'LANGUAGE', 'LAW', 'LOC', 'MONEY',
11
  'NORP', 'ORDINAL', 'ORG', 'PERCENT', 'PERSON', 'PRODUCT', 'QUANTITY', 'TIME', 'WORK_OF_ART']
12
 
 
 
13
 
14
+ def get_all_models():
15
+ with open("requirements.txt") as f:
16
+ content = f.readlines()
17
+ models = []
18
+ for line in content:
19
+ if "huggingface.co" in line:
20
+ model = "_".join(line.split("/")[4].split("_")[:3])
21
+ if model not in models:
22
+ models.append(model)
23
+ return models
24
 
25
+
26
+ models = get_all_models()
27
+
28
+
29
+ def dependency(text, col_punct, col_phrase, compact, model):
30
+ nlp = spacy.load(model + "_sm")
31
  doc = nlp(text)
32
  options = {"compact": compact, "collapse_phrases": col_phrase,
33
  "collapse_punct": col_punct}
 
35
  return html
36
 
37
 
38
+ def entity(text, ents, model):
39
+ nlp = spacy.load(model + "_sm")
40
  doc = nlp(text)
41
  options = {"ents": ents}
42
  html = displacy.render(doc, style="ent", options=options)
43
  return html
44
 
45
 
46
+ def token(text, attributes, model):
47
+ nlp = spacy.load(model + "_sm")
 
 
 
 
48
  data = []
49
  doc = nlp(text)
50
  for tok in doc:
 
55
  return data
56
 
57
 
58
+ def vectors(text, model):
59
+ nlp = spacy.load(model + "_md")
60
+ doc = nlp(text)
61
  n_chunks = [chunk for chunk in doc.noun_chunks]
62
  words = [tok for tok in doc if not tok.is_stop and tok.pos_ not in [
63
  'PUNCT', "PROPN"]]
 
66
  return round(choice[0].similarity(choice[1]), 2), choice[0].text, choice[1].text
67
 
68
 
69
+ def span(text, span1, span2, label1, label2, model):
70
+ nlp = spacy.load(model + "_sm")
71
  doc = nlp(text)
72
  idx1_1 = 0
73
  idx1_2 = 0
 
100
  demo = gr.Blocks()
101
 
102
  with demo:
 
103
  text_input = gr.Textbox(value=DEFAULT_TEXT, interactive=True)
104
+ model_input = gr.Dropdown(
105
+ choices=models, value=DEFAULT_MODEL, interactive=True)
106
  with gr.Tabs():
107
  with gr.TabItem("Dependency"):
108
  col_punct = gr.Checkbox(label="Collapse Punctuation", value=True)
 
115
  entity_output = gr.HTML()
116
  entity_button = gr.Button("Generate")
117
  with gr.TabItem("Tokens"):
118
+ with gr.Column():
119
+ tok_input = gr.CheckboxGroup(
120
+ DEFAULT_TOK_ATTR, value=DEFAULT_TOK_ATTR)
121
+ tok_output = gr.Dataframe(
122
+ headers=DEFAULT_TOK_ATTR, overflow_row_behaviour="paginate")
123
  tok_button = gr.Button("Generate")
124
  with gr.TabItem("Similarity"):
125
  sim_text1 = gr.Textbox(value="David Bowie", label="Chosen")
 
127
  sim_output = gr.Textbox(value="0.09", label="Similarity Score")
128
  sim_button = gr.Button("Generate")
129
  with gr.TabItem("Spans"):
130
+ with gr.Row():
131
+ span1 = gr.Textbox(value="David Bowie", label="Span 1")
132
+ label1 = gr.Textbox(value="Name",
133
+ label="Label for Span 1")
134
+ with gr.Row():
135
+ span2 = gr.Textbox(value="David", label="Span 2")
136
+ label2 = gr.Textbox(value="First",
137
+ label="Label for Span 2")
138
  span_output = gr.HTML()
139
  span_button = gr.Button("Generate")
140
 
141
  depen_button.click(dependency, inputs=[
142
+ text_input, col_punct, col_phrase, compact, model_input], outputs=depen_output)
143
  entity_button.click(
144
+ entity, inputs=[text_input, entity_input, model_input], outputs=entity_output)
145
+ tok_button.click(
146
+ token, inputs=[text_input, tok_input, model_input], outputs=tok_output)
147
+ sim_button.click(vectors, inputs=[text_input, model_input], outputs=[
148
  sim_output, sim_text1, sim_text2])
149
  span_button.click(
150
+ span, inputs=[text_input, span1, span2, label1, label2, model_input], outputs=span_output)
151
 
152
  demo.launch()
requirements.txt CHANGED
@@ -1,8 +1,94 @@
1
 
2
- en-core-web-md @ https://github.com/explosion/spacy-models/releases/download/en_core_web_md-3.3.0/en_core_web_md-3.3.0-py3-none-any.whl
3
- en-core-web-sm @ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.3.0/en_core_web_sm-3.3.0-py3-none-any.whl
4
- fastapi==0.78.0
5
  gradio==3.0.18
6
  spacy==3.3.1
7
- spacy-legacy==3.0.9
8
- spacy-loggers==1.0.2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
 
 
 
 
2
  gradio==3.0.18
3
  spacy==3.3.1
4
+
5
+ https://huggingface.co/spacy/ca_core_news_lg/resolve/main/ca_core_news_lg-any-py3-none-any.whl
6
+ https://huggingface.co/spacy/ca_core_news_md/resolve/main/ca_core_news_md-any-py3-none-any.whl
7
+ https://huggingface.co/spacy/ca_core_news_sm/resolve/main/ca_core_news_sm-any-py3-none-any.whl
8
+ https://huggingface.co/spacy/ca_core_news_trf/resolve/main/ca_core_news_trf-any-py3-none-any.whl
9
+
10
+ https://huggingface.co/spacy/da_core_news_lg/resolve/main/da_core_news_lg-any-py3-none-any.whl
11
+ https://huggingface.co/spacy/da_core_news_md/resolve/main/da_core_news_md-any-py3-none-any.whl
12
+ https://huggingface.co/spacy/da_core_news_sm/resolve/main/da_core_news_sm-any-py3-none-any.whl
13
+ https://huggingface.co/spacy/da_core_news_trf/resolve/main/da_core_news_trf-any-py3-none-any.whl
14
+
15
+ https://huggingface.co/spacy/de_core_news_lg/resolve/main/de_core_news_lg-any-py3-none-any.whl
16
+ https://huggingface.co/spacy/de_core_news_md/resolve/main/de_core_news_md-any-py3-none-any.whl
17
+ https://huggingface.co/spacy/de_core_news_sm/resolve/main/de_core_news_sm-any-py3-none-any.whl
18
+ https://huggingface.co/spacy/de_dep_news_trf/resolve/main/de_dep_news_trf-any-py3-none-any.whl
19
+
20
+ https://huggingface.co/spacy/el_core_news_lg/resolve/main/el_core_news_lg-any-py3-none-any.whl
21
+ https://huggingface.co/spacy/el_core_news_md/resolve/main/el_core_news_md-any-py3-none-any.whl
22
+ https://huggingface.co/spacy/el_core_news_sm/resolve/main/el_core_news_sm-any-py3-none-any.whl
23
+
24
+ https://huggingface.co/spacy/en_core_web_lg/resolve/main/en_core_web_lg-any-py3-none-any.whl
25
+ https://huggingface.co/spacy/en_core_web_md/resolve/main/en_core_web_md-any-py3-none-any.whl
26
+ https://huggingface.co/spacy/en_core_web_sm/resolve/main/en_core_web_sm-any-py3-none-any.whl
27
+ https://huggingface.co/spacy/en_core_web_trf/resolve/main/en_core_web_trf-any-py3-none-any.whl
28
+
29
+ https://huggingface.co/spacy/es_core_news_lg/resolve/main/es_core_news_lg-any-py3-none-any.whl
30
+ https://huggingface.co/spacy/es_core_news_md/resolve/main/es_core_news_md-any-py3-none-any.whl
31
+ https://huggingface.co/spacy/es_core_news_sm/resolve/main/es_core_news_sm-any-py3-none-any.whl
32
+ https://huggingface.co/spacy/es_dep_news_trf/resolve/main/es_dep_news_trf-any-py3-none-any.whl
33
+
34
+ https://huggingface.co/spacy/fi_core_news_lg/resolve/main/fi_core_news_lg-any-py3-none-any.whl
35
+ https://huggingface.co/spacy/fi_core_news_md/resolve/main/fi_core_news_md-any-py3-none-any.whl
36
+ https://huggingface.co/spacy/fi_core_news_sm/resolve/main/fi_core_news_sm-any-py3-none-any.whl
37
+
38
+ https://huggingface.co/spacy/fr_core_news_lg/resolve/main/fr_core_news_lg-any-py3-none-any.whl
39
+ https://huggingface.co/spacy/fr_core_news_md/resolve/main/fr_core_news_md-any-py3-none-any.whl
40
+ https://huggingface.co/spacy/fr_core_news_sm/resolve/main/fr_core_news_sm-any-py3-none-any.whl
41
+ https://huggingface.co/spacy/fr_dep_news_trf/resolve/main/fr_dep_news_trf-any-py3-none-any.whl
42
+
43
+ https://huggingface.co/spacy/it_core_news_lg/resolve/main/it_core_news_lg-any-py3-none-any.whl
44
+ https://huggingface.co/spacy/it_core_news_md/resolve/main/it_core_news_md-any-py3-none-any.whl
45
+ https://huggingface.co/spacy/it_core_news_sm/resolve/main/it_core_news_sm-any-py3-none-any.whl
46
+
47
+ https://huggingface.co/spacy/ja_core_news_lg/resolve/main/ja_core_news_lg-any-py3-none-any.whl
48
+ https://huggingface.co/spacy/ja_core_news_md/resolve/main/ja_core_news_md-any-py3-none-any.whl
49
+ https://huggingface.co/spacy/ja_core_news_sm/resolve/main/ja_core_news_sm-any-py3-none-any.whl
50
+
51
+ https://huggingface.co/spacy/ko_core_news_lg/resolve/main/ko_core_news_lg-any-py3-none-any.whl
52
+ https://huggingface.co/spacy/ko_core_news_md/resolve/main/ko_core_news_md-any-py3-none-any.whl
53
+ https://huggingface.co/spacy/ko_core_news_sm/resolve/main/ko_core_news_sm-any-py3-none-any.whl
54
+
55
+ https://huggingface.co/spacy/lt_core_news_lg/resolve/main/lt_core_news_lg-any-py3-none-any.whl
56
+ https://huggingface.co/spacy/lt_core_news_md/resolve/main/lt_core_news_md-any-py3-none-any.whl
57
+ https://huggingface.co/spacy/lt_core_news_sm/resolve/main/lt_core_news_sm-any-py3-none-any.whl
58
+
59
+ https://huggingface.co/spacy/mk_core_news_lg/resolve/main/mk_core_news_lg-any-py3-none-any.whl
60
+ https://huggingface.co/spacy/mk_core_news_md/resolve/main/mk_core_news_md-any-py3-none-any.whl
61
+ https://huggingface.co/spacy/mk_core_news_sm/resolve/main/mk_core_news_sm-any-py3-none-any.whl
62
+
63
+ https://huggingface.co/spacy/nb_core_news_lg/resolve/main/nb_core_news_lg-any-py3-none-any.whl
64
+ https://huggingface.co/spacy/nb_core_news_md/resolve/main/nb_core_news_md-any-py3-none-any.whl
65
+ https://huggingface.co/spacy/nb_core_news_sm/resolve/main/nb_core_news_sm-any-py3-none-any.whl
66
+
67
+ https://huggingface.co/spacy/nl_core_news_lg/resolve/main/nl_core_news_lg-any-py3-none-any.whl
68
+ https://huggingface.co/spacy/nl_core_news_md/resolve/main/nl_core_news_md-any-py3-none-any.whl
69
+ https://huggingface.co/spacy/nl_core_news_sm/resolve/main/nl_core_news_sm-any-py3-none-any.whl
70
+
71
+ https://huggingface.co/spacy/pl_core_news_lg/resolve/main/pl_core_news_lg-any-py3-none-any.whl
72
+ https://huggingface.co/spacy/pl_core_news_md/resolve/main/pl_core_news_md-any-py3-none-any.whl
73
+ https://huggingface.co/spacy/pl_core_news_sm/resolve/main/pl_core_news_sm-any-py3-none-any.whl
74
+
75
+ https://huggingface.co/spacy/pt_core_news_lg/resolve/main/pt_core_news_lg-any-py3-none-any.whl
76
+ https://huggingface.co/spacy/pt_core_news_md/resolve/main/pt_core_news_md-any-py3-none-any.whl
77
+ https://huggingface.co/spacy/pt_core_news_sm/resolve/main/pt_core_news_sm-any-py3-none-any.whl
78
+
79
+ https://huggingface.co/spacy/ro_core_news_lg/resolve/main/ro_core_news_lg-any-py3-none-any.whl
80
+ https://huggingface.co/spacy/ro_core_news_md/resolve/main/ro_core_news_md-any-py3-none-any.whl
81
+ https://huggingface.co/spacy/ro_core_news_sm/resolve/main/ro_core_news_sm-any-py3-none-any.whl
82
+
83
+ https://huggingface.co/spacy/ru_core_news_lg/resolve/main/ru_core_news_lg-any-py3-none-any.whl
84
+ https://huggingface.co/spacy/ru_core_news_md/resolve/main/ru_core_news_md-any-py3-none-any.whl
85
+ https://huggingface.co/spacy/ru_core_news_sm/resolve/main/ru_core_news_sm-any-py3-none-any.whl
86
+
87
+ https://huggingface.co/spacy/sv_core_news_lg/resolve/main/sv_core_news_lg-any-py3-none-any.whl
88
+ https://huggingface.co/spacy/sv_core_news_md/resolve/main/sv_core_news_md-any-py3-none-any.whl
89
+ https://huggingface.co/spacy/sv_core_news_sm/resolve/main/sv_core_news_sm-any-py3-none-any.whl
90
+
91
+ https://huggingface.co/spacy/zh_core_web_lg/resolve/main/zh_core_web_lg-any-py3-none-any.whl
92
+ https://huggingface.co/spacy/zh_core_web_md/resolve/main/zh_core_web_md-any-py3-none-any.whl
93
+ https://huggingface.co/spacy/zh_core_web_sm/resolve/main/zh_core_web_sm-any-py3-none-any.whl
94
+ https://huggingface.co/spacy/zh_core_web_trf/resolve/main/zh_core_web_trf-any-py3-none-any.whl