max-long's picture
Update app.py
3d48f09 verified
import random
from gliner import GLiNER
import gradio as gr
from datasets import load_dataset
# Load the BL dataset
dataset_iter = iter(
load_dataset(
"max-long/bl_books_textile_filter",
split="train",
trust_remote_code=True
).shuffle(seed=42) # Shuffling added here
)
# Load the model
model = GLiNER.from_pretrained("max-long/textile_machines_ner_5_oct", trust_remote_code=True)
def ner(text: str):
labels = ["Textile Machinery"]
threshold = 0.5
# Predict entities using the fine-tuned GLiNER model
entities = model.predict_entities(text, labels, flat_ner=True, threshold=threshold)
# Filter for "Textile Machinery" entities
textile_entities = [
{
"entity": ent["label"],
"word": ent["text"],
"start": ent["start"],
"end": ent["end"],
"score": ent.get("score", 0),
}
for ent in entities
if ent["label"] == "Textile Machinery"
]
# Prepare entities for color-coded display using gr.HighlightedText in the required dictionary format
highlights = [{"start": ent["start"], "end": ent["end"], "entity": ent["entity"]} for ent in textile_entities]
# Return two outputs: one for the highlighted text and one for the entities in JSON format
return {
"text": text,
"entities": highlights
}, textile_entities
with gr.Blocks(title="Textile Machinery NER Demo") as demo:
gr.Markdown(
"""
# Textile Machinery Entity Recognition Demo
This demo selects a random text snippet from a subset of the British Library's books dataset and identifies "Textile Machinery" entities using a fine-tuned GLiNER model.
"""
)
# Display a random example
input_text = gr.Textbox(
value="The machine is fed by means of an endless apron, the wool entering at the smaller end, so that when most entangled it is subjected to the least motion. This apron is a great improvement on former machines, which were filled by hand, an operation attended with danger, and sometimes resulting in accidents. By the revolutions of the cylinder, the wool is torn, disentangled, and cleaned, and by the gradually increasing centrifugal force, it is impelled forwards towards the large end of the cone, encountering in its way increased motion ; which, however, it is better able to bear by becoming less and less entangled at every revolution. When the wool thus reaches the base of the cone, it is tossed into a chamber, where it is received upon another end- less apron, moving in a direction from the machine instead of towards it. Over this apron is a cylindrical wire cage, which revolves on an axis disposed parallel to the apron, and im- mediately over it is a revolving fan. Both these are covered and protected by sheet iron casings, but communicate with the chamber which receives the wool from the cone. When the whole is at work, the fan, drawing the dust out of the chamber, blows it through a chimney, or pipe, connected with the machine for the purpose. The cage prevents the escape of the wool with the dust, and, by its passage over the apron, it lays down the wool in a continuous fleece. The coarser wools, destined for common cloths, are willied more than once ; for instance, before and after dyeing, and after oiling, and before they are scribbled ; the finer wools do not, however, require this, as the operation of scribbling is a sufficient preparation for carding. In the West of England, the wool is beaten with wooden rollers, by women, after which it is placed in a wire screen, or hurdle, and pulled with the hands, so as to get rid of any burs or pitch, or other dirt which may not have been separ- ated by the willy.",
label="Text input",
placeholder="Enter your text here",
lines=5
)
# Define output components
output_highlighted = gr.HighlightedText(label="Predicted Entities")
output_entities = gr.JSON(label="Entities")
submit_btn = gr.Button("Find Textile Machinery!")
refresh_btn = gr.Button("Get New Snippet")
def get_new_snippet():
attempts = 0
max_attempts = 1000 # Prevent infinite loops
while attempts < max_attempts:
try:
sample = next(dataset_iter)['text']
return sample
except StopIteration:
break # Exit the loop if we run out of snippets
attempts += 1 # Increment attempts
return "No more snippets available." # Return this if no valid snippets are found
# Connect refresh button
refresh_btn.click(fn=get_new_snippet, outputs=input_text)
# Connect submit button
submit_btn.click(
fn=ner,
inputs=[input_text],
outputs=[output_highlighted, output_entities]
)
demo.queue()
demo.launch(debug=True, share=True)