Spaces:
Sleeping
Sleeping
Keane Moraes
commited on
Commit
•
359769b
1
Parent(s):
aec1dec
fix for the key error
Browse files- app.py +13 -6
- summary.py +23 -9
app.py
CHANGED
@@ -88,13 +88,13 @@ def generate_word_embeddings():
|
|
88 |
|
89 |
def generate_text_chunks_lib():
|
90 |
|
|
|
91 |
global title_entry, text_chunks_lib
|
92 |
global keywords
|
93 |
global tldr
|
94 |
global summary
|
95 |
global takeaways
|
96 |
global input_accepted
|
97 |
-
global data_transcription
|
98 |
|
99 |
# For each body of text, create text chunks of a certain token size required for the transformer
|
100 |
text_df = pd.DataFrame.from_dict({"title": [data_transcription["title"]], "text":[data_transcription["text"]]})
|
@@ -191,13 +191,20 @@ with st.sidebar:
|
|
191 |
thread1.join()
|
192 |
thread2.join()
|
193 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
194 |
# Generate the summary
|
195 |
if gen_summary == 'Yes':
|
196 |
-
print("\n\nTITLE ENTRY: ", title_entry)
|
197 |
se = TextSummarizer(title_entry)
|
198 |
text_transcription = data_transcription['text']
|
199 |
with st.spinner("Generating summary and TLDR..."):
|
200 |
-
print("\n\nTEXT_CHNK_SUMMARY\n\n", text_chunks_lib)
|
201 |
summary = se.generate_full_summary(text_chunks_lib)
|
202 |
summary_list = summary.split("\n\n")
|
203 |
tldr = se.generate_short_summary(summary_list)
|
@@ -208,6 +215,9 @@ with st.sidebar:
|
|
208 |
takeaways = kt.generate_key_takeaways(text_chunks_lib)
|
209 |
is_completed_analysis = True
|
210 |
bar.progress(100)
|
|
|
|
|
|
|
211 |
|
212 |
if is_completed_analysis:
|
213 |
st.header("Key Takeaways")
|
@@ -331,9 +341,6 @@ with tab6:
|
|
331 |
print("user input is ", user_input)
|
332 |
print("the folder name at got here 0.5 is ", folder_name)
|
333 |
|
334 |
-
# if 'messages' not in st.session_state:
|
335 |
-
# st.session_state['messages'] = get_initial_message()
|
336 |
-
|
337 |
if user_input:
|
338 |
print("got here 1")
|
339 |
print("the folder name at got here 1.5 is ", folder_name)
|
|
|
88 |
|
89 |
def generate_text_chunks_lib():
|
90 |
|
91 |
+
global data_transcription
|
92 |
global title_entry, text_chunks_lib
|
93 |
global keywords
|
94 |
global tldr
|
95 |
global summary
|
96 |
global takeaways
|
97 |
global input_accepted
|
|
|
98 |
|
99 |
# For each body of text, create text chunks of a certain token size required for the transformer
|
100 |
text_df = pd.DataFrame.from_dict({"title": [data_transcription["title"]], "text":[data_transcription["text"]]})
|
|
|
191 |
thread1.join()
|
192 |
thread2.join()
|
193 |
|
194 |
+
def generate_summary():
|
195 |
+
pass
|
196 |
+
|
197 |
+
def generate_key_takeaways():
|
198 |
+
pass
|
199 |
+
|
200 |
+
threadSum = Thread(target=generate_summary)
|
201 |
+
threadTak = Thread(target=generate_key_takeaways)
|
202 |
+
|
203 |
# Generate the summary
|
204 |
if gen_summary == 'Yes':
|
|
|
205 |
se = TextSummarizer(title_entry)
|
206 |
text_transcription = data_transcription['text']
|
207 |
with st.spinner("Generating summary and TLDR..."):
|
|
|
208 |
summary = se.generate_full_summary(text_chunks_lib)
|
209 |
summary_list = summary.split("\n\n")
|
210 |
tldr = se.generate_short_summary(summary_list)
|
|
|
215 |
takeaways = kt.generate_key_takeaways(text_chunks_lib)
|
216 |
is_completed_analysis = True
|
217 |
bar.progress(100)
|
218 |
+
|
219 |
+
with open(f"{folder_name}/data.json", "w") as f:
|
220 |
+
json.dump(data_transcription, f, indent=4)
|
221 |
|
222 |
if is_completed_analysis:
|
223 |
st.header("Key Takeaways")
|
|
|
341 |
print("user input is ", user_input)
|
342 |
print("the folder name at got here 0.5 is ", folder_name)
|
343 |
|
|
|
|
|
|
|
344 |
if user_input:
|
345 |
print("got here 1")
|
346 |
print("the folder name at got here 1.5 is ", folder_name)
|
summary.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
import models as md
|
2 |
import nltk
|
3 |
-
|
4 |
import openai
|
5 |
import os
|
6 |
|
@@ -39,17 +39,31 @@ class TextSummarizer:
|
|
39 |
|
40 |
def generate_full_summary(self, text_chunks_lib:dict) -> str:
|
41 |
sum_dict = dict()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
42 |
for _, key in enumerate(text_chunks_lib):
|
43 |
|
44 |
-
# for key in text_chunks_lib:
|
45 |
summary = []
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
53 |
|
54 |
return sum_dict[self.title][0]
|
55 |
|
|
|
1 |
import models as md
|
2 |
import nltk
|
3 |
+
from threading import Thread
|
4 |
import openai
|
5 |
import os
|
6 |
|
|
|
39 |
|
40 |
def generate_full_summary(self, text_chunks_lib:dict) -> str:
|
41 |
sum_dict = dict()
|
42 |
+
|
43 |
+
chunk_summaries = []
|
44 |
+
|
45 |
+
def generate_chunk_summary(text_chunk:str, i: int) -> str:
|
46 |
+
chunk_summary = md.summarizer_gen(self.summarizer, sequence=text_chunk, maximum_tokens=500, minimum_tokens=100)
|
47 |
+
chunk_summaries[i] = chunk_summary
|
48 |
+
|
49 |
for _, key in enumerate(text_chunks_lib):
|
50 |
|
|
|
51 |
summary = []
|
52 |
+
threads = []
|
53 |
+
|
54 |
+
# make the chunk summaries in parallel
|
55 |
+
chunk_summaries = [None] * len(text_chunks_lib[key])
|
56 |
+
for i, text_chunk in enumerate(text_chunks_lib[key]):
|
57 |
+
threads.append(Thread(target=generate_chunk_summary, args=(text_chunk, i)))
|
58 |
+
|
59 |
+
for thread in threads:
|
60 |
+
thread.start()
|
61 |
+
|
62 |
+
for thread in threads:
|
63 |
+
thread.join()
|
64 |
+
|
65 |
+
final_summary = "\n\n".join(chunk_summaries)
|
66 |
+
sum_dict[key] = [final_summary]
|
67 |
|
68 |
return sum_dict[self.title][0]
|
69 |
|