Spaces:

poccio
/

ExtEnD

Build error

App Files Files Community

poccio commited on Mar 23, 2022

Commit

bb04844

•

1 Parent(s): 2c67aa0

initial commit

Browse files

Files changed (1) hide show

app.py +36 -42

app.py CHANGED Viewed

@@ -13,16 +13,40 @@ from classy.utils.streamlit import get_md_200_random_color_generator
 def main(
     model_checkpoint_path: str,
-    default_inventory_path: str,
     cuda_device: int,
 ):
     # setup examples
     examples = [
-        "Italy beat England and won Euro 2021.",
-        "Japan began the defence of their Asian Cup title with a lucky 2-1 win against Syria in a Group C championship match on Friday.",
         "The project was coded in Java.",
     ]
     # css rules
     st.write(
         """
@@ -69,13 +93,13 @@ def main(
             Given the sentence *After a long fight Superman saved Metropolis*, where *Superman* is the mention
             to disambiguate, ExtEnD first concatenates the descriptions of all the possible candidates of *Superman* in the
             inventory and then selects the span whose description best suits the mention in its context.
-            To convert this task to end2end entity linking, as we do in *Model demo*, we leverage spaCy
-            (more specifically, its NER) and run ExtEnD on each named entity spaCy identifies
-            (if the corresponding mention is contained in the inventory).
-            Links:
-             * [full paper](https://www.researchgate.net/publication/359392427_ExtEnD_Extractive_Entity_Disambiguation)
              * [GitHub](https://github.com/SapienzaNLP/extend)
         """
         )
@@ -84,25 +108,6 @@ def main(
     def demo():
         st.markdown("## Demo")
-        @st.cache(allow_output_mutation=True)
-        def load_resources(inventory_path):
-            # load nlp
-            nlp = spacy.load("en_core_web_sm")
-            extend_config = dict(
-                checkpoint_path=model_checkpoint_path,
-                mentions_inventory_path=inventory_path,
-                device=cuda_device,
-                tokens_per_batch=10_000,
-            )
-            nlp.add_pipe("extend", after="ner", config=extend_config)
-            # mock call to load resources
-            nlp(examples[0])
-            # return
-            return nlp
         # read input
         placeholder = st.selectbox(
             "Examples",
@@ -111,24 +116,14 @@ def main(
         )
         input_text = st.text_area("Input text to entity-disambiguate", placeholder)
-        # custom inventory
-        uploaded_inventory_path = st.file_uploader(
-            "[Optional] Upload custom inventory (tsv file, mention \\t desc1 \\t desc2 \\t)",
-            accept_multiple_files=False,
-            type=["tsv"],
-        )
-        if uploaded_inventory_path is not None:
-            inventory_path = f"data/inventories/{uploaded_inventory_path.name}"
-            with open(inventory_path, "wb") as f:
-                f.write(uploaded_inventory_path.getbuffer())
-        else:
-            inventory_path = default_inventory_path
         # load model and color generator
         nlp = load_resources(inventory_path)
         color_generator = get_md_200_random_color_generator()
-        if st.button("Disambiguate", key="classify"):
             # tag sentence
             time_start = time.perf_counter()
@@ -184,7 +179,6 @@ def main(
     hiw()
 if __name__ == "__main__":
     main(
         "experiments/extend-longformer-large/2021-10-22/09-11-39/checkpoints/best.ckpt",

 def main(
     model_checkpoint_path: str,
+    inventory_path: str,
     cuda_device: int,
 ):
     # setup examples
     examples = [
+        "Rome is in Italy",
+        "Japan began the defence of their title with a lucky 2-1 win against Syria in a Group C championship match on Friday.",
         "The project was coded in Java.",
     ]
+    # define load_resources
+    @st.cache(allow_output_mutation=True)
+    def load_resources(inventory_path):
+        # load nlp
+        nlp = spacy.load("en_core_web_sm")
+        extend_config = dict(
+            checkpoint_path=model_checkpoint_path,
+            mentions_inventory_path=inventory_path,
+            device=cuda_device,
+            tokens_per_batch=10_000,
+        )
+        nlp.add_pipe("extend", after="ner", config=extend_config)
+        # mock call to load resources
+        nlp(examples[0])
+        # return
+        return nlp
+    # preload default resources
+    load_resources(inventory_path)
     # css rules
     st.write(
         """
             Given the sentence *After a long fight Superman saved Metropolis*, where *Superman* is the mention
             to disambiguate, ExtEnD first concatenates the descriptions of all the possible candidates of *Superman* in the
             inventory and then selects the span whose description best suits the mention in its context.
+            To use ExtEnD for full end2end entity linking, as we do in *Demo*, we just need to leverage a mention
+            identifier. Here [we use spaCy](https://github.com/SapienzaNLP/extend#spacy) (more specifically, its NER) and run ExtEnD on each named
+            entity spaCy identifies (if the corresponding mention is contained in the inventory).
+            ##### Links:
+             * [Full Paper](https://www.researchgate.net/publication/359392427_ExtEnD_Extractive_Entity_Disambiguation)
              * [GitHub](https://github.com/SapienzaNLP/extend)
         """
         )
     def demo():
         st.markdown("## Demo")
         # read input
         placeholder = st.selectbox(
             "Examples",
         )
         input_text = st.text_area("Input text to entity-disambiguate", placeholder)
+        # button
+        should_disambiguate = st.button("Disambiguate", key="classify")
         # load model and color generator
         nlp = load_resources(inventory_path)
         color_generator = get_md_200_random_color_generator()
+        if should_disambiguate:
             # tag sentence
             time_start = time.perf_counter()
     hiw()
 if __name__ == "__main__":
     main(
         "experiments/extend-longformer-large/2021-10-22/09-11-39/checkpoints/best.ckpt",