import streamlit as st st.set_page_config( layout="centered", # Can be "centered" or "wide". In the future also "dashboard", etc. initial_sidebar_state="auto", # Can be "auto", "expanded", "collapsed" page_title='Extractive Summarization', # String or None. Strings get appended with "• Streamlit". page_icon='./favicon.png', # String, anything supported by st.image, or None. ) import pandas as pd import numpy as np import os import sys sys.path.append(os.path.abspath('./')) import streamlit_apps_config as config from streamlit_ner_output import show_html2, jsl_display_annotations, get_color import sparknlp from sparknlp.base import * from sparknlp.annotator import * from pyspark.sql import functions as F from sparknlp_display import NerVisualizer from pyspark.ml import Pipeline from pyspark.sql.types import StringType spark= sparknlp.start() ## Marking down NER Style st.markdown(config.STYLE_CONFIG, unsafe_allow_html=True) root_path = config.project_path ########## To Remove the Main Menu Hamburger ######## hide_menu_style = """ """ st.markdown(hide_menu_style, unsafe_allow_html=True) ########## Side Bar ######## ## loading logo(newer version with href) import base64 @st.cache(allow_output_mutation=True) def get_base64_of_bin_file(bin_file): with open(bin_file, 'rb') as f: data = f.read() return base64.b64encode(data).decode() @st.cache(allow_output_mutation=True) def get_img_with_href(local_img_path, target_url): img_format = os.path.splitext(local_img_path)[-1].replace('.', '') bin_str = get_base64_of_bin_file(local_img_path) html_code = f''' ''' return html_code logo_html = get_img_with_href('./jsl-logo.png', 'https://www.johnsnowlabs.com/') st.sidebar.markdown(logo_html, unsafe_allow_html=True) #sidebar info model_name= ["nerdl_fewnerd_100d"] st.sidebar.title("Pretrained model to test") selected_model = st.sidebar.selectbox("", model_name) ######## Main Page ######### app_title= "Detect up to 8 entity types in general domain texts" app_description= "Named Entity Recognition model aimed to detect up to 8 entity types from general domain texts. This model was trained on the Few-NERD/inter public dataset using Spark NLP, and is available in Spark NLP Models hub (https://nlp.johnsnowlabs.com/models)" st.title(app_title) st.markdown("

"+app_description+"

" , unsafe_allow_html=True) if selected_model == "nerdl_fewnerd_100d": st.markdown("**`PERSON`** **,** **`ORGANIZATION`** **,** **`LOCATION`** **,** **`ART`** **,** **`BUILDING`** **,** **`PRODUCT`** **,** **`EVENT`** **,** **`OTHER`**", unsafe_allow_html=True) st.subheader("") #### Running model and creating pipeline st.cache(allow_output_mutation=True) def get_pipeline(text): documentAssembler = DocumentAssembler()\ .setInputCol("text")\ .setOutputCol("document") sentenceDetector= SentenceDetector()\ .setInputCols(["document"])\ .setOutputCol("sentence") tokenizer = Tokenizer()\ .setInputCols(["sentence"])\ .setOutputCol("token") embeddings= WordEmbeddingsModel.pretrained("glove_100d")\ .setInputCols(["sentence", "token"])\ .setOutputCol("embeddings") ner= NerDLModel.pretrained("nerdl_fewnerd_100d")\ .setInputCols(["document", "token", "embeddings"])\ .setOutputCol("ner") ner_converter= NerConverter()\ .setInputCols(["sentence", "token", "ner"])\ .setOutputCol("ner_chunk") pipeline = Pipeline( stages = [ documentAssembler, sentenceDetector, tokenizer, embeddings, ner, ner_converter ]) empty_df = spark.createDataFrame([[""]]).toDF("text") pipeline_model = pipeline.fit(empty_df) text_df= spark.createDataFrame(pd.DataFrame({"text": [text]})) result= pipeline_model.transform(text_df).toPandas() return result text= st.text_input("Type here your text and press enter to run:") result= get_pipeline(text) #Displaying Ner Visualization df= pd.DataFrame({"ner_chunk": result["ner_chunk"].iloc[0]}) labels_set = set() for i in df['ner_chunk'].values: labels_set.add(i[4]['entity']) labels_set = list(labels_set) labels = st.sidebar.multiselect( "NER Labels", options=labels_set, default=list(labels_set) ) show_html2(text, df, labels, "Text annotated with identified Named Entities")