import streamlit as st from transformers import pipeline import pandas as pd # Get transformer model and set up a pipeline model_ckpt = "papluca/xlm-roberta-base-language-detection" pipe = pipeline("text-classification", model=model_ckpt) labels = {"ar" : "Arabic", "bg" : "Bulgarian", "de" : "German", "el" : "Modern Greek", "en" : "English", "es" : "Spanish", "fr" : "French", "hi" : "Hindi", "it" : "Italian", "ja" : "Japanese", "nl" : "Dutch", "pl" : "Polish", "pt" : "Portuguese", "ru" : "Russian", "sw" : "Swahili", "th" : "Thai", "tr" : "Turkish", "ur" : "Urdu", "vi" : "Vietnamese", "zh" : "Chinese"} def predict(text: str) -> dict: """Compute predictions for text.""" preds = pipe(text, return_all_scores=True, truncation=True, max_length=128) if preds: pred = preds[0] return {labels.get(p["label"],p["label"]): float(p["score"]) for p in pred} else: return None title = "Language detection with XLM-RoBERTa" description = "Determine the language in which your text is written." examples = [ ["Financial contracts refer to agreements made in the financial market to buy or sell financial instruments such as stocks, bonds, forex, commodities, and indices. These contracts are typically entered into by financial institutions including banks, insurance companies, investment firms, and other financial intermediaries. In these contracts, the buyer assumes certain risks while the seller assumes others. For example, in a forex contract"], ["金融契约是指在金融市场上购买或销售金融工具,如股票、债券、外汇、商品和指数等的合同。这些合同通常由金融机构缔结,包括银行、保险公司"], ["Finanzverträge beziehen sich auf Vereinbarungen, die auf dem Finanzmarkt getroffen werden, um Finanzinstrumente wie Aktien, Anleihen, Devisen, Rohstoffe und Indizes zu kauen oder zu verkaufen. Diese Verträge werden in der Regel von Finanzinstituten wie Banken, Versicherungsunternehmen, Investmentfirmen und anderen "], ] explanation = "Supported languages are (20): arabic (ar), bulgarian (bg), german (de), modern greek (el), english (en), spanish (es), french (fr), hindi (hi), italian (it), japanese (ja), dutch (nl), polish (pl), portuguese (pt), russian (ru), swahili (sw), thai (th), turkish (tr), urdu (ur), vietnamese (vi), and chinese (zh)." app = gr.Interface( fn=predict, inputs=gr.inputs.Textbox( placeholder="What's the text you want to know the language for?", label="Text", lines=3, ), outputs=gr.outputs.Label(num_top_classes=3, label="Your text is written in "), title=title, description=description, examples=examples, article=explanation, ) app.launch()