Spaces:
Running
Running
from youtube_transcript_api import YouTubeTranscriptApi | |
from youtube_transcript_api.formatters import JSONFormatter | |
import json | |
import ingest | |
import run_localGPT | |
import utils | |
def audio_to_transcript(video_id): | |
sub = YouTubeTranscriptApi.get_transcript(video_id) | |
formatted_subs = JSONFormatter().format_transcript(transcript=sub) | |
with open("transcript.json", "w") as outfile: | |
json.dump(sub, outfile) | |
lstTexts = [] | |
for dct in sub: | |
lstTexts.append(dct['text']) | |
strResult = ' '.join(lstTexts) | |
with open('SOURCE_DOCUMENTS/transcript.txt', 'w') as outfile: | |
outfile.write(strResult) | |
transcript = ' '.join(lstTexts) | |
utils.calculate_ends('transcript.json','transcript_end.json') | |
utils.create_chunks('transcript_end.json','chunks.json') | |
return transcript | |
def start_training(): | |
training_status = ingest.main() | |
return training_status | |
def replace_substring_and_following(input_string, substring): | |
index = input_string.find(substring) | |
if index != -1: | |
return input_string[:index] | |
else: | |
return input_string | |
def ask_question(strQuestion): | |
answer = run_localGPT.main(device_type='cpu', strQuery=strQuestion) | |
answer_cleaned = replace_substring_and_following(answer, "Unhelpful Answer") | |
return answer_cleaned | |
def summarize(): | |
from langchain.text_splitter import CharacterTextSplitter | |
from langchain.chains.mapreduce import MapReduceChain | |
from langchain.prompts import PromptTemplate | |
model_id = "TheBloke/Llama-2-7B-Chat-GGML" | |
model_basename = "llama-2-7b-chat.ggmlv3.q4_0.bin" | |
llm = run_localGPT.load_model(device_type='cpu', model_id=model_id, model_basename=model_basename) | |
text_splitter = CharacterTextSplitter() | |
with open("SOURCE_DOCUMENTS/transcript.txt") as f: | |
file_content = f.read() | |
texts = text_splitter.split_text(file_content) | |
from langchain.docstore.document import Document | |
docs = [Document(page_content=t) for t in texts] | |
from langchain.chains.summarize import load_summarize_chain | |
chain = load_summarize_chain(llm, chain_type="map_reduce") | |
summary = chain.run(docs) | |
return summary | |