Catherine Breslin commited on
Commit
8e8b7d6
1 Parent(s): c90cf13

Similarity Heatmap Demo

Browse files
Files changed (2) hide show
  1. app.py +47 -0
  2. requirements.txt +8 -0
app.py ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import nltk
3
+ from transformers import pipeline
4
+ from sentence_transformers import SentenceTransformer
5
+ from scipy.spatial.distance import cosine
6
+ import numpy as np
7
+ import seaborn as sns
8
+ import matplotlib.pyplot as plt
9
+
10
+ def plot_heatmap(labels, heatmap, rotation=90):
11
+ sns.set(font_scale=1.2)
12
+ fig, ax = plt.subplots()
13
+ g = sns.heatmap(
14
+ heatmap,
15
+ xticklabels=labels,
16
+ yticklabels=labels,
17
+ vmin=-1,
18
+ vmax=1,
19
+ cmap="coolwarm")
20
+ g.set_xticklabels(labels, rotation=rotation)
21
+ g.set_title("Textual Similarity")
22
+
23
+ st.pyplot(fig)
24
+ #plt.show()
25
+
26
+ st.header("Sentence Similarity Demo")
27
+ st.markdown("This demo uses the sentence_transformers library to plot sentence similarity between a list of sentences. Change the text below and try for yourself!")
28
+
29
+ # Streamlit text boxes
30
+ text = st.text_area('Enter sentences:', value="The sun is hotter than the moon.\nThe sun is very bright.\nI hear that the universe is very large.\nToday is Tuesday.")
31
+
32
+ # Model setup
33
+ model = SentenceTransformer('paraphrase-distilroberta-base-v1')
34
+ nltk.download('punkt')
35
+
36
+ # Run model
37
+ if text:
38
+ sentences = nltk.tokenize.sent_tokenize(text)
39
+ embed = model.encode(sentences)
40
+
41
+ sim = np.zeros([len(embed), len(embed)])
42
+ for i,em in enumerate(embed):
43
+ for j,ea in enumerate(embed):
44
+ sim[i][j] = 1.0-cosine(em,ea)
45
+ plot_heatmap(sentences, sim)
46
+
47
+
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ torch
2
+ transformers
3
+ sentence_transformers
4
+ nltk
5
+ scipy
6
+ numpy
7
+ seaborn
8
+ matplotlib