Spaces:

ml6team
/

keyphrase-extraction

Running

App Files Files Community

DeDeckerThomas commited on May 19, 2022

Commit

55dc8b1

•

1 Parent(s): 8cbff17

Small bug fixes + Code clean-up

Browse files

Files changed (7) hide show

README.md +0 -9
app.py +39 -31
pipelines/__pycache__/keyphrase_extraction_pipeline.cpython-39.pyc +0 -0
pipelines/__pycache__/keyphrase_generation_pipeline.cpython-39.pyc +0 -0
pipelines/keyphrase_extraction_pipeline.py +2 -2
pipelines/keyphrase_generation_pipeline.py +5 -7
requirements.txt +1 -1

README.md CHANGED Viewed

@@ -8,15 +8,6 @@ sdk_version: 1.2.0
 app_file: app.py
 pinned: false
 license: mit
-models:
-  - DeDeckerThomas/keyphrase-extraction-kbir-inspec
-  - DeDeckerThomas/keyphrase-extraction-distilbert-openkp
-  - DeDeckerThomas/keyphrase-extraction-distilbert-kptimes
-  - DeDeckerThomas/keyphrase-extraction-distilbert-inspec
-  - DeDeckerThomas/keyphrase-extraction-kbir-kpcrowd
-  - DeDeckerThomas/keyphrase-generation-keybart-inspec
-  - DeDeckerThomas/keyphrase-generation-t5-small-inspec
-  - DeDeckerThomas/keyphrase-generation-t5-small-openkp
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces#reference

 app_file: app.py
 pinned: false
 license: mit
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces#reference

app.py CHANGED Viewed

@@ -1,12 +1,12 @@
 import streamlit as st
-import pandas as pd
 from pipelines.keyphrase_extraction_pipeline import KeyphraseExtractionPipeline
 from pipelines.keyphrase_generation_pipeline import KeyphraseGenerationPipeline
-import orjson
-from annotated_text.util import get_annotated_html
-import re
-import string
 @st.cache(allow_output_mutation=True, show_spinner=False)
@@ -28,7 +28,7 @@ def extract_keyphrases():
     st.session_state.current_run_id += 1
-def get_annotated_text(text, keyphrases):
     for keyphrase in keyphrases:
         text = re.sub(
             rf"({keyphrase})([^A-Za-z])",
@@ -60,7 +60,7 @@ def get_annotated_text(text, keyphrases):
                         word,
                     ),
                     "KEY",
-                    "#21c354",
                 )
             )
         else:
@@ -73,25 +73,36 @@ def get_annotated_text(text, keyphrases):
     return result
-def render_output(layout, runs, reverse=False, multi_select=False):
     runs = list(runs.values())[::-1] if reverse else list(runs.values())
     for run in runs:
-        layout.markdown(f"**⚙️ Output run {run.get('run_id')}**")
-        layout.markdown(f"**Model**: {run.get('model')}")
-        result = get_annotated_text(run.get("text"), list(run.get("keyphrases")))
         layout.markdown(
-            get_annotated_html(*result),
             unsafe_allow_html=True,
         )
-        if "generation" in st.session_state.chosen_model:
             abstractive_keyphrases = [
                 keyphrase
                 for keyphrase in run.get("keyphrases")
                 if keyphrase.lower() not in run.get("text").lower()
             ]
-            layout.write(", ".join(abstractive_keyphrases))
         layout.markdown("---")
@@ -102,10 +113,6 @@ if "config" not in st.session_state:
     st.session_state.history = {}
     st.session_state.keyphrases = []
     st.session_state.current_run_id = 1
-    st.session_state.chosen_model = st.session_state.config.get("models")[0]
-if "select_rows" not in st.session_state:
-    st.session_state.selected_rows = []
 st.set_page_config(
     page_icon="🔑",
@@ -130,11 +137,8 @@ context of a document, which is quite an improvement.
 This space gives you the ability to test around with some keyphrase extraction and generation models.
 Keyphrase extraction models are transformers models fine-tuned as a token classification problem where
-the tokens in a text are annotated as:
-* B: Beginning of a keyphrase
-* I: Inside a keyphrases
-* O: Outside a keyhprase.
 While keyphrase extraction can only extract keyphrases from a given text. Keyphrase generation models
 work a bit differently. Here you use an encoder-decoder model like BART to generate keyphrases from a given text.
@@ -156,23 +160,27 @@ with st.form("keyphrase-extraction-form"):
         f"For more information about the chosen model, please be sure to check out the [🤗 Model Card](https://huggingface.co/DeDeckerThomas/{st.session_state.chosen_model})."
     )
-    st.session_state.input_text = st.text_area(
-        "✍ Input", st.session_state.config.get("example_text"), height=250
-    ).replace("\n", " ")
     with st.spinner("Extracting keyphrases..."):
         pressed = st.form_submit_button("Extract")
-if pressed:
     with st.spinner("Loading pipeline..."):
         pipe = load_pipeline(
             f"{st.session_state.config.get('model_author')}/{st.session_state.chosen_model}"
         )
     with st.spinner("Extracting keyphrases"):
         extract_keyphrases()
 options = st.multiselect(
-    "Specify runs you want to see",
     st.session_state.history.keys(),
     format_func=lambda run_id: f"Run {run_id.split('_')[1]}",
 )

+import re
+import string
+import orjson
 import streamlit as st
+from annotated_text.util import get_annotated_html
 from pipelines.keyphrase_extraction_pipeline import KeyphraseExtractionPipeline
 from pipelines.keyphrase_generation_pipeline import KeyphraseGenerationPipeline
 @st.cache(allow_output_mutation=True, show_spinner=False)
     st.session_state.current_run_id += 1
+def get_annotated_text(text, keyphrases, color="#d294ff"):
     for keyphrase in keyphrases:
         text = re.sub(
             rf"({keyphrase})([^A-Za-z])",
                         word,
                     ),
                     "KEY",
+                    color,
                 )
             )
         else:
     return result
+def render_output(layout, runs, reverse=False):
     runs = list(runs.values())[::-1] if reverse else list(runs.values())
     for run in runs:
         layout.markdown(
+            f"""
+            <p style=\"margin-bottom: 0rem\"><strong>Run:</strong> {run.get('run_id')}</p>
+            <p style=\"margin-bottom: 0rem\"><strong>Model:</strong> {run.get('model')}</p>
+            """,
             unsafe_allow_html=True,
         )
+        if "generation" in run.get("model"):
             abstractive_keyphrases = [
                 keyphrase
                 for keyphrase in run.get("keyphrases")
                 if keyphrase.lower() not in run.get("text").lower()
             ]
+            layout.markdown(
+                f"<p style=\"margin-bottom: 0rem\"><strong>Absent keyphrases:</strong> {', '.join(abstractive_keyphrases) if abstractive_keyphrases else 'None' }</p>",
+                unsafe_allow_html=True,
+            )
+        result = get_annotated_text(run.get("text"), list(run.get("keyphrases")))
+        layout.markdown(
+            f"""
+            <p style="margin-bottom: 0.5rem"><strong>Text:</strong></p>
+            {get_annotated_html(*result)}
+            """,
+            unsafe_allow_html=True,
+        )
         layout.markdown("---")
     st.session_state.history = {}
     st.session_state.keyphrases = []
     st.session_state.current_run_id = 1
 st.set_page_config(
     page_icon="🔑",
 This space gives you the ability to test around with some keyphrase extraction and generation models.
 Keyphrase extraction models are transformers models fine-tuned as a token classification problem where
+the tokens in a text are annotated as B (Beginning of a keyphrase), I (Inside a keyphrases),
+and O (Outside a keyhprase).
 While keyphrase extraction can only extract keyphrases from a given text. Keyphrase generation models
 work a bit differently. Here you use an encoder-decoder model like BART to generate keyphrases from a given text.
         f"For more information about the chosen model, please be sure to check out the [🤗 Model Card](https://huggingface.co/DeDeckerThomas/{st.session_state.chosen_model})."
     )
+    st.session_state.input_text = (
+        st.text_area("✍ Input", st.session_state.config.get("example_text"), height=250)
+        .replace("\n", " ")
+        .strip()
+    )
     with st.spinner("Extracting keyphrases..."):
         pressed = st.form_submit_button("Extract")
+if pressed and st.session_state.input_text != "":
     with st.spinner("Loading pipeline..."):
         pipe = load_pipeline(
             f"{st.session_state.config.get('model_author')}/{st.session_state.chosen_model}"
         )
     with st.spinner("Extracting keyphrases"):
         extract_keyphrases()
+elif st.session_state.input_text == "":
+    st.error("The text input is empty 🙃 Please provide a text in the input field.")
 options = st.multiselect(
+    "Specify the runs you want to see",
     st.session_state.history.keys(),
     format_func=lambda run_id: f"Run {run_id.split('_')[1]}",
 )

pipelines/__pycache__/keyphrase_extraction_pipeline.cpython-39.pyc CHANGED Viewed

Binary files a/pipelines/__pycache__/keyphrase_extraction_pipeline.cpython-39.pyc and b/pipelines/__pycache__/keyphrase_extraction_pipeline.cpython-39.pyc differ

pipelines/__pycache__/keyphrase_generation_pipeline.cpython-39.pyc CHANGED Viewed

Binary files a/pipelines/__pycache__/keyphrase_generation_pipeline.cpython-39.pyc and b/pipelines/__pycache__/keyphrase_generation_pipeline.cpython-39.pyc differ

pipelines/keyphrase_extraction_pipeline.py CHANGED Viewed

@@ -1,10 +1,10 @@
 from transformers import (
-    TokenClassificationPipeline,
     AutoModelForTokenClassification,
     AutoTokenizer,
 )
 from transformers.pipelines import AggregationStrategy
-import numpy as np
 class KeyphraseExtractionPipeline(TokenClassificationPipeline):

+import numpy as np
 from transformers import (
     AutoModelForTokenClassification,
     AutoTokenizer,
+    TokenClassificationPipeline,
 )
 from transformers.pipelines import AggregationStrategy
 class KeyphraseExtractionPipeline(TokenClassificationPipeline):

pipelines/keyphrase_generation_pipeline.py CHANGED Viewed

@@ -1,10 +1,8 @@
-from transformers import (
-    Text2TextGenerationPipeline,
-    AutoModelForSeq2SeqLM,
-    AutoTokenizer,
-)
 import string
 class KeyphraseGenerationPipeline(Text2TextGenerationPipeline):
     def __init__(self, model, keyphrase_sep_token=";", *args, **kwargs):
@@ -20,11 +18,11 @@ class KeyphraseGenerationPipeline(Text2TextGenerationPipeline):
         results = super().postprocess(model_outputs=model_outputs)
         return [
             [
-                keyphrase.strip().translate(str.maketrans('', '', string.punctuation))
                 for keyphrase in result.get("generated_text").split(
                     self.keyphrase_sep_token
                 )
-                if keyphrase.translate(str.maketrans('', '', string.punctuation)) != ""
             ]
             for result in results
         ][0]

 import string
+from transformers import (AutoModelForSeq2SeqLM, AutoTokenizer,
+                          Text2TextGenerationPipeline)
 class KeyphraseGenerationPipeline(Text2TextGenerationPipeline):
     def __init__(self, model, keyphrase_sep_token=";", *args, **kwargs):
         results = super().postprocess(model_outputs=model_outputs)
         return [
             [
+                keyphrase.strip().translate(str.maketrans("", "", string.punctuation))
                 for keyphrase in result.get("generated_text").split(
                     self.keyphrase_sep_token
                 )
+                if keyphrase.translate(str.maketrans("", "", string.punctuation)) != ""
             ]
             for result in results
         ][0]

requirements.txt CHANGED Viewed

@@ -2,4 +2,4 @@ orjson==3.6.8
 transformers[torch]==4.17.0
 pandas==1.4.1
 numpy==1.22.3
-st-annotated-text==3.0.0

 transformers[torch]==4.17.0
 pandas==1.4.1
 numpy==1.22.3
+st-annotated-text==3.0.0