Spaces:
Sleeping
Sleeping
File size: 6,801 Bytes
14fa848 30d04c6 197e844 7551cdd 14fa848 30d04c6 b5dd5bc 30d04c6 b5dd5bc 30d04c6 b5dd5bc 30d04c6 7551cdd 197e844 7551cdd 197e844 7551cdd 197e844 c89a5c0 100e8bf 7edd56d 100e8bf c89a5c0 100e8bf c89a5c0 100e8bf c89a5c0 100e8bf 197e844 c89a5c0 197e844 c89a5c0 197e844 c89a5c0 197e844 5c82e3e 197e844 30d04c6 7edd56d c89a5c0 7edd56d 30d04c6 7551cdd 30d04c6 b5dd5bc 30d04c6 7551cdd 30d04c6 7551cdd 30d04c6 7551cdd 30d04c6 7551cdd 30d04c6 7551cdd 30d04c6 7551cdd 30d04c6 7551cdd 30d04c6 100e8bf 30d04c6 c89a5c0 197e844 30d04c6 5c82e3e 7551cdd 5c82e3e |
|
import streamlit as st
from transformers import pipeline
from transformers import AutoTokenizer, AutoModelForTokenClassification
import pandas as pd
from pprint import pprint
@st.cache_resource()
def load_trained_model():
tokenizer = AutoTokenizer.from_pretrained("LampOfSocrates/bert-cased-plodcw-sourav")
model = AutoModelForTokenClassification.from_pretrained("LampOfSocrates/bert-cased-plodcw-sourav")
# Mapping labels
id2label = model.config.id2label
# Print the label mapping
print(f"Can recognise the following labels {id2label}")
# Load the NER model and tokenizer from Hugging Face
#ner_pipeline = pipeline("ner", model="dbmdz/bert-large-cased-finetuned-conll03-english")
ner_pipeline = pipeline("ner", model=model, tokenizer = tokenizer)
return ner_pipeline
@st.cache_data()
def load_plod_cw_dataset():
from datasets import load_dataset
dataset = load_dataset("surrey-nlp/PLOD-CW")
return dataset
def load_random_examples(dataset_name, num_examples=5):
"""
Load random examples from the specified Hugging Face dataset.
Args:
dataset_name (str): The name of the dataset to load.
num_examples (int): The number of random examples to load.
Returns:
pd.DataFrame: A DataFrame containing the random examples.
"""
# Load the dataset
dat = load_plod_cw_dataset()
# Convert the dataset to a pandas DataFrame
df = pd.DataFrame(dat['test'])
# Select random examples
random_examples = df.sample(n=1)
tokens = random_examples.tokens
ner_tags = random_examples.ner_tags
return pd.DataFrame((tokens, ner_tags))
def render_entities(tokens, entities):
"""
Renders a page with a 2-column table showing the entity corresponding to each token.
"""
# Custom CSS for chilled and cool theme
st.markdown("""
<style>
body {
font-family: 'Arial', sans-serif;
background-color: #f0f0f5;
color: #333333;
}
table {
width: 100%;
border-collapse: collapse;
}
th, td {
padding: 12px;
text-align: left;
border-bottom: 1px solid #dddddd;
}
th {
background-color: #4CAF50;
color: white;
width: 16.66%;
}
tr:hover {
background-color: #f5f5f5;
}
td {
width: 16.66%;
}
</style>
""", unsafe_allow_html=True)
# Title and description
st.title("Model predicted Token vs Entities Table")
st.write("This table shows the entity corresponding to each token in a cool and chilled theme.")
# Create the table
table_data = {"Token": tokens, "Entity": entities}
st.table(table_data)
def render_random_examples():
"""
Render random examples from the PLOD-CW dataset in a Streamlit table.
"""
# Load random examples
# Custom CSS for chilled and cool theme
st.markdown("""
<style>
body {
font-family: 'Arial', sans-serif;
background-color: #f0f0f5;
color: #333333;
}
table {
width: 100%;
border-collapse: collapse;
}
th, td {
padding: 12px;
text-align: left;
border-bottom: 1px solid #dddddd;
}
th {
background-color: #4CAF50;
color: white;
width: 16.66%;
}
tr:hover {
background-color: #f5f5f5;
}
td {
width: 16.66%;
}
</style>
""", unsafe_allow_html=True)
# Title and description
st.title("Random Examples from PLOD-CW")
st.write("This table shows 1 random examples from the PLOD-CW dataset in a cool and chilled theme.")
# Add a button to select a different set of random samples
if st.button('Show another set of random examples'):
st.session_state['random_examples'] = load_random_examples("surrey-nlp/PLOD-CW")
# Load random examples if not already loaded
if 'random_examples' not in st.session_state:
st.session_state['random_examples'] = load_random_examples("surrey-nlp/PLOD-CW")
# Display the table
st.table(st.session_state['random_examples'])
def predict_using_trained(sentence):
model = load_trained_model()
entities = model(sentence)
return entities
def prep_page():
model = load_trained_model()
# Streamlit app
# Page configuration
#st.set_page_config(page_title="NER Token Entities", layout="centered")
st.title("Named Entity Recognition with BERT on PLOD-CW")
st.write("Enter a sentence to see the named entities recognized by the model.")
# Text input
text = st.text_area("Enter your sentence here:")
# Perform NER and display results
if text:
st.write("Entities recognized:")
entities = model(text)
pprint(entities)
# Create a dictionary to map entity labels to colors
label_colors = {
'B-LF': 'lightblue',
'B-O': 'lightgreen',
'B-AC': 'lightcoral',
'I-LF': 'lightyellow'
}
# Prepare the HTML output with styled entities
def get_entity_html(text, entities):
html = "<div>"
last_idx = 0
for entity in entities:
start = entity['start']
end = entity['end']
label = entity['entity']
entity_text = text[start:end]
color = label_colors.get(label, 'lightgray')
# Append the text before the entity
html += text[last_idx:start].replace(" ", "<br>")
# Append the entity with styling
html += f'<div style="background-color: {color}; padding: 5px; border-radius: 3px; margin: 5px 0;">{entity_text}</div>'
last_idx = end
# Append any remaining text after the last entity
html += text[last_idx:].replace(" ", "<br>")
html += "</div>"
return html
# Generate and display the styled HTML
styled_text = get_entity_html(text, entities)
st.markdown(styled_text, unsafe_allow_html=True)
render_entities(text, entities)
render_random_examples()
if __name__ == '__main__':
query_params = st.query_params
if 'api' in query_params:
sentence = query_params.get('sentence')
entities = predict_using_trained(sentence)
response = {"sentence" : sentence , "entities" : entities}
pprint(response)
st.write(response)
else:
prep_page()
|