import streamlit as st
import pandas as pd
import numpy as np
from sentence_transformers.util import cos_sim
from sentence_transformers import SentenceTransformer
from bokeh.plotting import figure, output_notebook, show, save
from import output_file, show
from bokeh.models import ColumnDataSource, HoverTool
from sklearn.manifold import TSNE
def load_model():
model = SentenceTransformer('hackathon-pln-es/paraphrase-spanish-distilroberta')
return model
st.title("Sentence Embedding for Spanish with Bertin")
st.write("Sentence embedding for spanish trained according to instructions in the paper [Making Monolingual Sentence Embeddings Multilingual using Knowledge Distillation]( and the [documentation]( accompanying its companion python package. We have used the strongest available pretrained English Bi-Encoder ([paraphrase-mpnet-base-v2]( as a teacher model, and the pretrained Spanish [BERTIN]( as the student model. Used for Sentence Textual Similarity. Based on the model hackathon-pln-es/paraphrase-spanish-distilroberta.")
st.write("Introduce two sentence to see their cosine similarity and a graph showing them in the embedding space.")
st.write("Authors: Anibal Pérez, Emilio Tomás Ariza, Lautaro Gesuelli Pinto y Mauricio Mazuecos.")
sent1 = st.text_area('Enter sentence 1')
sent2 = st.text_area('Enter sentence 2')
if st.button('Compute similarity'):
if sent1 and sent2:
model = load_model()
encodings = model.encode([sent1, sent2])
sim = cos_sim(encodings[0], encodings[1]).numpy().tolist()[0][0]
st.text('Cosine Similarity: {0:.4f}'.format(sim))
st.write('Missing a sentences')