Spaces:
Running
Running
Hellisotherpeople
commited on
Commit
β’
4133681
1
Parent(s):
741ff55
Update app.py
Browse files
app.py
CHANGED
@@ -11,9 +11,22 @@ import streamlit.components.v1 as components
|
|
11 |
|
12 |
st.set_page_config(page_title="DebateKG")
|
13 |
st.title("DebateKG - Automatic Policy Debate Case Creation")
|
|
|
14 |
st.caption("github: https://github.com/Hellisotherpeople/DebateKG")
|
15 |
|
16 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
17 |
|
18 |
|
19 |
|
@@ -22,7 +35,7 @@ seg = pysbd.Segmenter(language="en", clean=False)
|
|
22 |
|
23 |
|
24 |
embeddings = Embeddings({
|
25 |
-
"path": "
|
26 |
"content": True,
|
27 |
"functions": [
|
28 |
{"name": "graph", "function": "graph.attribute"},
|
@@ -44,4 +57,73 @@ embeddings = Embeddings({
|
|
44 |
embeddings.load("DebateSum_SemanticGraph_mpnet_extract.tar.gz")
|
45 |
graph = embeddings.graph
|
46 |
|
47 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
11 |
|
12 |
st.set_page_config(page_title="DebateKG")
|
13 |
st.title("DebateKG - Automatic Policy Debate Case Creation")
|
14 |
+
st.write("WIP, give me a few more days before reviewing!")
|
15 |
st.caption("github: https://github.com/Hellisotherpeople/DebateKG")
|
16 |
|
17 |
|
18 |
+
form = st.sidebar.form("Main Settings")
|
19 |
+
form.header("Main Settings")
|
20 |
+
number_of_paths = form.number_input("Enter the cutoff number of paths for all shortest path search", value = 4)
|
21 |
+
highlight_threshold = form.number_input("Enter the minimum similarity value needed to highlight" , value = 4)
|
22 |
+
show_extract = form.checkbox("Show extracts", value = False)
|
23 |
+
show_abstract = form.checkbox("Show abstract", value = False)
|
24 |
+
show_full_doc = form.checkbox("Show full doc", value = False)
|
25 |
+
show_citation = form.checkbox("Show citation", value = False)
|
26 |
+
rerank_word = form.text_area("Enter the word", value = "Full-Document")
|
27 |
+
rerank_topic = form.text_area("Enter the topic", value = "Full-Document")
|
28 |
+
|
29 |
+
form.form_submit_button("Submit")
|
30 |
|
31 |
|
32 |
|
|
|
35 |
|
36 |
|
37 |
embeddings = Embeddings({
|
38 |
+
"path": "sentence-transformers/all-mpnet-base-v2",
|
39 |
"content": True,
|
40 |
"functions": [
|
41 |
{"name": "graph", "function": "graph.attribute"},
|
|
|
57 |
embeddings.load("DebateSum_SemanticGraph_mpnet_extract.tar.gz")
|
58 |
graph = embeddings.graph
|
59 |
|
60 |
+
def david_distance(source, target, attrs):
|
61 |
+
distance = max(1.0 - attrs["weight"], 0.0)
|
62 |
+
return distance if distance >= 0.15 else 1.00
|
63 |
+
|
64 |
+
def david_showpath(source, target, the_graph):
|
65 |
+
return nx.shortest_path(the_graph, source, target, david_distance)
|
66 |
+
|
67 |
+
|
68 |
+
|
69 |
+
import string
|
70 |
+
|
71 |
+
def highlight(index, result):
|
72 |
+
output = f"{index}. "
|
73 |
+
spans = [(token, score, "#fff59d" if score > 0.01 else None) for token, score in result["tokens"]]
|
74 |
+
|
75 |
+
for token, _, color in spans:
|
76 |
+
output += f"<span style='background-color: {color}'>{token}</span> " if color else f"{token} "
|
77 |
+
|
78 |
+
return output
|
79 |
+
|
80 |
+
|
81 |
+
|
82 |
+
def showpath_any(list_of_arguments, strip_punctuation = True, the_graph=graph.backend):
|
83 |
+
list_of_paths = []
|
84 |
+
for x, y in zip(list_of_arguments, list_of_arguments[1:]):
|
85 |
+
a_path = david_showpath(x, y, the_graph)
|
86 |
+
list_of_paths.extend(a_path)
|
87 |
+
#print(list_of_paths)
|
88 |
+
path = [graph.attribute(p, "text") for p in list_of_paths]
|
89 |
+
list_of_evidence_ids = []
|
90 |
+
for text in path:
|
91 |
+
if strip_punctuation:
|
92 |
+
text = text.translate(str.maketrans("","", string.punctuation))
|
93 |
+
list_of_evidence_ids.append(int(embeddings.search(f"select id from txtai where similar('{text}') limit 1")[0]['id']))
|
94 |
+
print(list_of_evidence_ids)
|
95 |
+
|
96 |
+
sections = []
|
97 |
+
for x, p in enumerate(path):
|
98 |
+
if x == 0:
|
99 |
+
# Print start node
|
100 |
+
|
101 |
+
sections.append(f"{x + 1}. {p}")
|
102 |
+
#sections.append(dataset["Abstract"][list_of_evidence_ids[x]])
|
103 |
+
#sections.append(dataset["Citation"][list_of_evidence_ids[x+1]])
|
104 |
+
#sections.append(dataset["Full-Document"][list_of_evidence_ids[x]])
|
105 |
+
|
106 |
+
if x < len(path) - 1:
|
107 |
+
# Explain and highlight next path element
|
108 |
+
results = embeddings.explain(p, [path[x + 1]], limit=1)[0]
|
109 |
+
sections.append(highlight(x + 2, results))
|
110 |
+
#sections.append(dataset["Abstract"][list_of_evidence_ids[x+1]])
|
111 |
+
#sections.append(dataset["Citation"][list_of_evidence_ids[x+1]])
|
112 |
+
#sections.append(dataset["Full-Document"][list_of_evidence_ids[x+1]])
|
113 |
+
|
114 |
+
return components.html("<br/><br/>".join(sections), scrolling = True, width = 800, height = 1000)
|
115 |
+
|
116 |
+
def question(text, rerank_word = "", rerank_topic = "", limit = 100):
|
117 |
+
return embeddings.search(f"select id, text, topic, evidence_id, score from txtai where similar('{text}') and text like '%{rerank_word}%' and topic like '%{rerank_topic}%' limit {limit}")
|
118 |
+
|
119 |
+
|
120 |
+
|
121 |
+
query_form = st.form("Query the Index:")
|
122 |
+
query_form.write("Write a SQL query")
|
123 |
+
query_form_submitted = query_form.form_submit_button("Click me to get ")
|
124 |
+
|
125 |
+
|
126 |
+
#showpath_any([3, 12, 15])
|
127 |
+
|
128 |
+
with st.expander("mine", expanded = False):
|
129 |
+
st.write(embeddings.search(f"select * from txtai where similar('you') and text like '%the%' limit 10"))
|