Spaces:
Sleeping
Sleeping
File size: 5,227 Bytes
1dc581a 999a2cb 1dc581a 999a2cb 1dc581a 8ed11d6 0b6e959 68135f3 0b6e959 fc2149d 0b6e959 1dc581a 85c3256 8ed11d6 1dc581a a695cd7 8ed11d6 a695cd7 4fd99d4 85c3256 a695cd7 1dc581a a695cd7 999a2cb 8154f86 999a2cb 1dc581a 3d48f09 999a2cb 8154f86 8ed11d6 3d48f09 1dc581a 999a2cb a695cd7 999a2cb 4fd99d4 999a2cb 3e1e93b 999a2cb 4fd99d4 8ed11d6 4fd99d4 999a2cb a695cd7 999a2cb c09c624 999a2cb cbccbc9 c09c624 cbccbc9 75c03f0 999a2cb 4fd99d4 999a2cb 4fd99d4 999a2cb a695cd7 4fd99d4 999a2cb 1dc581a 999a2cb 3d48f09 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 |
import random
from gliner import GLiNER
import gradio as gr
from datasets import load_dataset
# Load the BL dataset
dataset_iter = iter(
load_dataset(
"max-long/bl_books_textile_filter",
split="train",
trust_remote_code=True
).shuffle(seed=42) # Shuffling added here
)
# Load the model
model = GLiNER.from_pretrained("max-long/textile_machines_ner_5_oct", trust_remote_code=True)
def ner(text: str):
labels = ["Textile Machinery"]
threshold = 0.5
# Predict entities using the fine-tuned GLiNER model
entities = model.predict_entities(text, labels, flat_ner=True, threshold=threshold)
# Filter for "Textile Machinery" entities
textile_entities = [
{
"entity": ent["label"],
"word": ent["text"],
"start": ent["start"],
"end": ent["end"],
"score": ent.get("score", 0),
}
for ent in entities
if ent["label"] == "Textile Machinery"
]
# Prepare entities for color-coded display using gr.HighlightedText in the required dictionary format
highlights = [{"start": ent["start"], "end": ent["end"], "entity": ent["entity"]} for ent in textile_entities]
# Return two outputs: one for the highlighted text and one for the entities in JSON format
return {
"text": text,
"entities": highlights
}, textile_entities
with gr.Blocks(title="Textile Machinery NER Demo") as demo:
gr.Markdown(
"""
# Textile Machinery Entity Recognition Demo
This demo selects a random text snippet from a subset of the British Library's books dataset and identifies "Textile Machinery" entities using a fine-tuned GLiNER model.
"""
)
# Display a random example
input_text = gr.Textbox(
value="The machine is fed by means of an endless apron, the wool entering at the smaller end, so that when most entangled it is subjected to the least motion. This apron is a great improvement on former machines, which were filled by hand, an operation attended with danger, and sometimes resulting in accidents. By the revolutions of the cylinder, the wool is torn, disentangled, and cleaned, and by the gradually increasing centrifugal force, it is impelled forwards towards the large end of the cone, encountering in its way increased motion ; which, however, it is better able to bear by becoming less and less entangled at every revolution. When the wool thus reaches the base of the cone, it is tossed into a chamber, where it is received upon another end- less apron, moving in a direction from the machine instead of towards it. Over this apron is a cylindrical wire cage, which revolves on an axis disposed parallel to the apron, and im- mediately over it is a revolving fan. Both these are covered and protected by sheet iron casings, but communicate with the chamber which receives the wool from the cone. When the whole is at work, the fan, drawing the dust out of the chamber, blows it through a chimney, or pipe, connected with the machine for the purpose. The cage prevents the escape of the wool with the dust, and, by its passage over the apron, it lays down the wool in a continuous fleece. The coarser wools, destined for common cloths, are willied more than once ; for instance, before and after dyeing, and after oiling, and before they are scribbled ; the finer wools do not, however, require this, as the operation of scribbling is a sufficient preparation for carding. In the West of England, the wool is beaten with wooden rollers, by women, after which it is placed in a wire screen, or hurdle, and pulled with the hands, so as to get rid of any burs or pitch, or other dirt which may not have been separ- ated by the willy.",
label="Text input",
placeholder="Enter your text here",
lines=5
)
# Define output components
output_highlighted = gr.HighlightedText(label="Predicted Entities")
output_entities = gr.JSON(label="Entities")
submit_btn = gr.Button("Find Textile Machinery!")
refresh_btn = gr.Button("Get New Snippet")
def get_new_snippet():
attempts = 0
max_attempts = 1000 # Prevent infinite loops
while attempts < max_attempts:
try:
sample = next(dataset_iter)['text']
return sample
except StopIteration:
break # Exit the loop if we run out of snippets
attempts += 1 # Increment attempts
return "No more snippets available." # Return this if no valid snippets are found
# Connect refresh button
refresh_btn.click(fn=get_new_snippet, outputs=input_text)
# Connect submit button
submit_btn.click(
fn=ner,
inputs=[input_text],
outputs=[output_highlighted, output_entities]
)
demo.queue()
demo.launch(debug=True, share=True) |