from transformers import pipeline, AutoModelForTokenClassification, AutoTokenizer
import gradio as gr
from spacy import displacy

tokenizer = AutoTokenizer.from_pretrained("lirondos/anglicisms-spanish-mbert")
model = AutoModelForTokenClassification.from_pretrained(
    "lirondos/anglicisms-spanish-mbert"
)
nlp = pipeline("ner", model=model, tokenizer=tokenizer)

diplacy_dict_template = {
    "text": "But Google is starting from behind.",
    "ents": [{"start": 4, "end": 10, "label": "ORG"}],
    "title": None,
}


def infer(input_text):
    displacy_ents = []
    borrowings = nlp(input_text)
    for borrowing in borrowings:
        displacy_ent_dict = {
            "start": borrowing["start"],
            "end": borrowing["end"],
            "label": borrowing["entity"],
        }
        displacy_ents.append(displacy_ent_dict)

    displacy_dict_template = {"text": input_text, "ents": displacy_ents, "title": None}

    html = displacy.render(displacy_dict_template, style="ent", page=True, manual=True)
    
    html = (
        ""
        + html
        + ""
    )
    
    return html


demo = gr.Interface(
    title="Borrowing Detection Español",
    fn=infer,
    inputs=gr.Text(),
    outputs=gr.HTML(),
    examples=["Buscamos data scientist para proyecto de machine learning."],
)

demo.launch()