Spaces:

bla
/

tranny

Runtime error

App Files Files Community

Mbonea commited on Sep 11, 2023

Commit

4e0c974

•

1 Parent(s): f337592

summarization improved

Browse files

Files changed (4) hide show

App/Chat/ChatRoutes.py +4 -7
App/Chat/utils/{PalmAPI.py → Dev/PalmAPI.py} +43 -6
App/Chat/utils/RAG.py +32 -0
App/Chat/utils/Summarize.py +97 -0

App/Chat/ChatRoutes.py CHANGED Viewed

@@ -5,7 +5,8 @@ from App.Users.Schemas import UserSchema
 from App.Transcription.Model import Transcriptions
 from App.Transcription.Schemas import *
 from App import bot
-from .utils.PalmAPI import generate_summary,summarization
 import aiohttp
 import os
@@ -39,10 +40,6 @@ async def generate_message(    task_id: str,
     text =''
     for item in result.content:
         text+=item['text']
-    docs=generate_summary(text)
-    summaries =[]
-    for doc in docs:
-        summary=await summarization(doc.page_content)
-        summaries.append(summary)
-    return summaries

 from App.Transcription.Model import Transcriptions
 from App.Transcription.Schemas import *
 from App import bot
+from .utils.Summarize import Summarizer
 import aiohttp
 import os
     text =''
     for item in result.content:
         text+=item['text']
+    summary=await Summarizer(text)
+    return  [summary]

App/Chat/utils/{PalmAPI.py → Dev/PalmAPI.py} RENAMED Viewed

@@ -1,15 +1,52 @@
 import aiohttp
 import asyncio
 import google.generativeai as palm
 import os
 PALM_API = ""
 API_KEY=os.environ.get("PALM_API",PALM_API)
 palm.configure(api_key=API_KEY)
-from langchain.text_splitter import RecursiveCharacterTextSplitter
 def count_tokens(text):
     return palm.count_message_tokens(prompt=text)['token_count']
@@ -108,5 +145,5 @@ Yo, Mabu, you really the only independent artist putting up numbers right now, b
-if __name__ == '__main__':
-    asyncio.run(main=main())

 import aiohttp
 import asyncio
 import google.generativeai as palm
+from langchain.llms import GooglePalm
+from langchain.chains.summarize import load_summarize_chain
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain import PromptTemplate
 import os
 PALM_API = ""
 API_KEY=os.environ.get("PALM_API",PALM_API)
 palm.configure(api_key=API_KEY)
+llm = GooglePalm(google_api_key=API_KEY,        safety_settings= [
+            {"category": "HARM_CATEGORY_DEROGATORY", "threshold": 4},
+            {"category": "HARM_CATEGORY_TOXICITY", "threshold": 4},
+            {"category": "HARM_CATEGORY_VIOLENCE", "threshold": 4},
+            {"category": "HARM_CATEGORY_SEXUAL", "threshold": 4},
+            {"category": "HARM_CATEGORY_MEDICAL", "threshold": 4},
+            {"category": "HARM_CATEGORY_DANGEROUS", "threshold": 4},
+        ],)
+text_splitter = RecursiveCharacterTextSplitter(separators=["\n\n", "\n"], chunk_size=10000, chunk_overlap=500)
+summary_chain = load_summarize_chain(llm=llm, chain_type='map_reduce',
+#                                      verbose=True # Set verbose=True if you want to see the prompts being used
+                                    )
+essay= ''' TFC Mamma Ron Subway Galito Urban Heart Kootie Java Square In this video, I'm going to try every single fast food chain in Irobi Kenya and I'm going to rate them on a scale of terrible, bad, mid, good and for the incredible ones, go zest! I've broken them up into categories, so pizza category, burger category, chicken, general fast food and breakfast category and I'm starting with Pizza Hut To keep this fair across all restaurants, I'm ordering the cheapest possible meal on the menu or as close to my budget of 500 Kenya shillings and for that price in Pizza Hut Okay, so this is the mine meat lovers pizza This is going to be my first tasting of Pizza Hut in Irobi Kenya I haven't washed my hands, no one has to know that Okay, I could already feel how chunky this pizza is Maybe dip that in this barbecue sauce Mmm Okay,  '''
+docs = text_splitter.create_documents([essay])
+# print(docs[0].page_content)
+map_prompt = """
+Write a concise summary of the following:
+"{text}"
+CONCISE SUMMARY:
+"""
+combine_prompt = """
+Write a concise summary of the following text delimited by triple backquotes.
+Return your response in bullet points which covers the key points of the text.
+```{text}```
+BULLET POINT SUMMARY:
+"""
+combine_prompt_template = PromptTemplate(template=combine_prompt, input_variables=["text"])
+map_prompt_template = PromptTemplate(template=map_prompt, input_variables=["text"])
+summary_chain = load_summarize_chain(llm=llm,
+                                     chain_type='map_reduce',
+                                     map_prompt=map_prompt_template,
+                                     combine_prompt=combine_prompt_template,
+                                     verbose=True
+                                    )
+output = summary_chain.run(docs)
+print(output)
 def count_tokens(text):
     return palm.count_message_tokens(prompt=text)['token_count']
+# if __name__ == '__main__':
+#     asyncio.run(main=main())

App/Chat/utils/RAG.py ADDED Viewed

	@@ -0,0 +1,32 @@

+import aiohttp
+import asyncio,pprint
+import google.generativeai as palm
+from langchain.chains.question_answering import load_qa_chain
+from langchain.llms import GooglePalm
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain import PromptTemplate
+import os
+PALM_API = ''
+API_KEY=os.environ.get("PALM_API",PALM_API)
+palm.configure(api_key=API_KEY)
+def count_tokens(text):
+    return palm.count_message_tokens(prompt=text)['token_count']
+llm = GooglePalm(
+    google_api_key=API_KEY,       **{ "safety_settings": [
+            {"category": "HARM_CATEGORY_DEROGATORY", "threshold": 4},
+            {"category": "HARM_CATEGORY_TOXICITY", "threshold": 4},
+            {"category": "HARM_CATEGORY_VIOLENCE", "threshold": 4},
+            {"category": "HARM_CATEGORY_SEXUAL", "threshold": 4},
+            {"category": "HARM_CATEGORY_MEDICAL", "threshold": 4},
+            {"category": "HARM_CATEGORY_DANGEROUS", "threshold": 4},
+        ]})
+text_splitter = RecursiveCharacterTextSplitter(separators=["\n\n", "\n","."], chunk_size=40_000, chunk_overlap=500)
+with open('./sample.txt', 'r') as file:
+    essay = file.read()
+docs = text_splitter.create_documents([essay])
+for doc in docs:
+    print(count_tokens(doc.page_content))

App/Chat/utils/Summarize.py ADDED Viewed

	@@ -0,0 +1,97 @@

+import aiohttp
+import asyncio,pprint
+import google.generativeai as palm
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain import PromptTemplate
+import os
+PALM_API = ''
+API_KEY=os.environ.get("PALM_API",PALM_API)
+palm.configure(api_key=API_KEY)
+text_splitter = RecursiveCharacterTextSplitter(separators=["\n\n", "\n","."], chunk_size=40_000, chunk_overlap=500)
+map_prompt = """
+Write a verbose summary  like a masters student of the following:
+"{text}"
+CONCISE SUMMARY:
+"""
+combine_prompt = """
+Write a concise summary of the following text delimited by triple backquotes.
+Return your response in a detailed verbose paragraph which covers the text. Make it as insightful to the reader as possible, write like a masters student.
+```{text}```
+SUMMARY:
+"""
+def count_tokens(text):
+    return palm.count_message_tokens(prompt=text)['token_count']
+async def PalmTextModel(text,candidates=1):
+    url = f"https://generativelanguage.googleapis.com/v1beta2/models/text-bison-001:generateText?key={API_KEY}"
+    headers = {
+        "Content-Type": "application/json",
+    }
+    data = {
+        "prompt": {
+            "text": text
+        },
+        "temperature": 0.95,
+        "top_k": 100,
+        "top_p": 0.95,
+        "candidate_count": candidates,
+        "max_output_tokens": 1024,
+        "stop_sequences": ["</output>"],
+        "safety_settings": [
+            {"category": "HARM_CATEGORY_DEROGATORY", "threshold": 4},
+            {"category": "HARM_CATEGORY_TOXICITY", "threshold": 4},
+            {"category": "HARM_CATEGORY_VIOLENCE", "threshold": 4},
+            {"category": "HARM_CATEGORY_SEXUAL", "threshold": 4},
+            {"category": "HARM_CATEGORY_MEDICAL", "threshold": 4},
+            {"category": "HARM_CATEGORY_DANGEROUS", "threshold": 4},
+        ],
+    }
+    async with aiohttp.ClientSession() as session:
+        async with session.post(url, json=data, headers=headers) as response:
+            if response.status == 200:
+                result = await response.json()
+                # print(result)
+                if candidates>1:
+                    temp = [candidate["output"] for candidate in result["candidates"]]
+                    return temp
+                temp = result["candidates"][0]["output"]
+                return temp
+            else:
+                print(f"Error: {response.status}\n{await response.text()}")
+async def Summarizer(essay):
+    docs = text_splitter.create_documents([essay])
+    #for 1 large document
+    if len(docs) == 1:
+        tasks = [PalmTextModel(combine_prompt.format(text=doc.page_content)) for doc in docs]
+        # Gather and execute the tasks concurrently
+        responses = await asyncio.gather(*tasks)
+        ans=" ".join(responses)
+        return ans
+    tasks = [PalmTextModel(map_prompt.format(text=doc.page_content)) for doc in docs]
+    # Gather and execute the tasks concurrently
+    responses = await asyncio.gather(*tasks)
+    main=" ".join(responses)
+    ans=await PalmTextModel(combine_prompt.format(text=main),candidates=1)
+    return ans