Spaces:
Sleeping
Sleeping
import models as md | |
import nltk | |
from threading import Thread | |
import openai | |
import os | |
nltk.download("punkt") | |
class TextSummarizer: | |
def __init__(self, title): | |
self.title = title | |
self.model = "gpt-3.5-turbo" | |
self.summarizer = md.load_summary_model() | |
openai.api_key = os.getenv("OPENAI_API_KEY") | |
def generate_short_summary(self, summary_chunks:dict) -> list: | |
PROMPT = """ | |
You are a helpful assistant that summarizes youtube videos. | |
Someone has already summarized the video to key points. | |
Summarize the key points in at most two sentences that capture the essence of the passage. | |
""" | |
final_summary = [] | |
for summary_chunk in summary_chunks: | |
response = openai.ChatCompletion.create( | |
model=self.model, | |
messages=[ | |
{"role": "system", "content": PROMPT}, | |
{"role": "user", "content": summary_chunk}, | |
], | |
) | |
summary = response["choices"][0]["message"]["content"] | |
final_summary.append(summary) | |
return final_summary | |
def generate_full_summary(self, text_chunks_lib:dict) -> str: | |
sum_dict = dict() | |
chunk_summaries = [] | |
def generate_chunk_summary(text_chunk:str, i: int) -> str: | |
chunk_summary = md.summarizer_gen(self.summarizer, sequence=text_chunk, maximum_tokens=500, minimum_tokens=100) | |
chunk_summaries[i] = chunk_summary | |
for _, key in enumerate(text_chunks_lib): | |
summary = [] | |
threads = [] | |
# make the chunk summaries in parallel | |
chunk_summaries = [None] * len(text_chunks_lib[key]) | |
for i, text_chunk in enumerate(text_chunks_lib[key]): | |
threads.append(Thread(target=generate_chunk_summary, args=(text_chunk, i))) | |
for thread in threads: | |
thread.start() | |
for thread in threads: | |
thread.join() | |
final_summary = "\n\n".join(chunk_summaries) | |
sum_dict[key] = [final_summary] | |
return sum_dict[self.title][0] | |