Spaces:
Sleeping
Sleeping
import random | |
from gliner import GLiNER | |
import gradio as gr | |
from datasets import load_dataset | |
# Load the BL dataset | |
dataset_iter = iter( | |
load_dataset( | |
"max-long/bl_books_textile_filter", | |
split="train", | |
trust_remote_code=True | |
).shuffle(seed=42) # Shuffling added here | |
) | |
# Load the model | |
model = GLiNER.from_pretrained("max-long/textile_machines_ner_5_oct", trust_remote_code=True) | |
def ner(text: str): | |
labels = ["Textile Machinery"] | |
threshold = 0.5 | |
# Predict entities using the fine-tuned GLiNER model | |
entities = model.predict_entities(text, labels, flat_ner=True, threshold=threshold) | |
# Filter for "Textile Machinery" entities | |
textile_entities = [ | |
{ | |
"entity": ent["label"], | |
"word": ent["text"], | |
"start": ent["start"], | |
"end": ent["end"], | |
"score": ent.get("score", 0), | |
} | |
for ent in entities | |
if ent["label"] == "Textile Machinery" | |
] | |
# Prepare entities for color-coded display using gr.HighlightedText in the required dictionary format | |
highlights = [{"start": ent["start"], "end": ent["end"], "entity": ent["entity"]} for ent in textile_entities] | |
# Return two outputs: one for the highlighted text and one for the entities in JSON format | |
return { | |
"text": text, | |
"entities": highlights | |
}, textile_entities | |
with gr.Blocks(title="Textile Machinery NER Demo") as demo: | |
gr.Markdown( | |
""" | |
# Textile Machinery Entity Recognition Demo | |
This demo selects a random text snippet from a subset of the British Library's books dataset and identifies "Textile Machinery" entities using a fine-tuned GLiNER model. | |
""" | |
) | |
# Display a random example | |
input_text = gr.Textbox( | |
value="The machine is fed by means of an endless apron, the wool entering at the smaller end, so that when most entangled it is subjected to the least motion. This apron is a great improvement on former machines, which were filled by hand, an operation attended with danger, and sometimes resulting in accidents. By the revolutions of the cylinder, the wool is torn, disentangled, and cleaned, and by the gradually increasing centrifugal force, it is impelled forwards towards the large end of the cone, encountering in its way increased motion ; which, however, it is better able to bear by becoming less and less entangled at every revolution. When the wool thus reaches the base of the cone, it is tossed into a chamber, where it is received upon another end- less apron, moving in a direction from the machine instead of towards it. Over this apron is a cylindrical wire cage, which revolves on an axis disposed parallel to the apron, and im- mediately over it is a revolving fan. Both these are covered and protected by sheet iron casings, but communicate with the chamber which receives the wool from the cone. When the whole is at work, the fan, drawing the dust out of the chamber, blows it through a chimney, or pipe, connected with the machine for the purpose. The cage prevents the escape of the wool with the dust, and, by its passage over the apron, it lays down the wool in a continuous fleece. The coarser wools, destined for common cloths, are willied more than once ; for instance, before and after dyeing, and after oiling, and before they are scribbled ; the finer wools do not, however, require this, as the operation of scribbling is a sufficient preparation for carding. In the West of England, the wool is beaten with wooden rollers, by women, after which it is placed in a wire screen, or hurdle, and pulled with the hands, so as to get rid of any burs or pitch, or other dirt which may not have been separ- ated by the willy.", | |
label="Text input", | |
placeholder="Enter your text here", | |
lines=5 | |
) | |
# Define output components | |
output_highlighted = gr.HighlightedText(label="Predicted Entities") | |
output_entities = gr.JSON(label="Entities") | |
submit_btn = gr.Button("Find Textile Machinery!") | |
refresh_btn = gr.Button("Get New Snippet") | |
def get_new_snippet(): | |
attempts = 0 | |
max_attempts = 1000 # Prevent infinite loops | |
while attempts < max_attempts: | |
try: | |
sample = next(dataset_iter)['text'] | |
return sample | |
except StopIteration: | |
break # Exit the loop if we run out of snippets | |
attempts += 1 # Increment attempts | |
return "No more snippets available." # Return this if no valid snippets are found | |
# Connect refresh button | |
refresh_btn.click(fn=get_new_snippet, outputs=input_text) | |
# Connect submit button | |
submit_btn.click( | |
fn=ner, | |
inputs=[input_text], | |
outputs=[output_highlighted, output_entities] | |
) | |
demo.queue() | |
demo.launch(debug=True, share=True) |