Spaces:

leandrocarneiro
/

BotNews

Runtime error

App Files Files Community

leandrocarneiro commited on Feb 1

Commit

566bba1

•

1 Parent(s): 99c997b

Upload 7 files

Browse files

Files changed (4) hide show

app.py +5 -2
constants.py +1 -1
rag.py +21 -14
search_engine.py +12 -7

app.py CHANGED Viewed

@@ -19,9 +19,12 @@ def call_generate_news(subject, sites, min_words, max_words):
     if max_words < min_words:
         return 'Erro: Máximo de palavras menor que o mínimo de palavras'
     list_sites = sites.split('\n')
-    result_news = main.generate_news(subject, min_words, max_words, list_sites)
     return result_news
 def call_invoke_llm(context, prompt):

     if max_words < min_words:
         return 'Erro: Máximo de palavras menor que o mínimo de palavras'
+    filtered_list_sites = []
     list_sites = sites.split('\n')
+    for item in list_sites:
+        if item:
+            filtered_list_sites.append(item)
+    result_news = main.generate_news(subject, min_words, max_words, filtered_list_sites)
     return result_news
 def call_invoke_llm(context, prompt):

constants.py CHANGED Viewed

@@ -5,5 +5,5 @@
 #subject = 'Guerra entre Irã e Paquistão'
 #sites = ['https://www.cnnbrasil.com.br/']#, 'https://g1.globo.com/', 'https://www.metropoles.com/']
-num_sites = 1#5
 local_base = 'local_base'

 #subject = 'Guerra entre Irã e Paquistão'
 #sites = ['https://www.cnnbrasil.com.br/']#, 'https://g1.globo.com/', 'https://www.metropoles.com/']
+num_sites = 5
 local_base = 'local_base'

rag.py CHANGED Viewed

@@ -44,6 +44,7 @@ def generate_embeddings_and_vectorstore(path):
         fc_embeddings = OpenAIEmbeddings(openai_api_key=os.environ['OPENAI_KEY'])
         vectorstore = Chroma.from_documents(docs, fc_embeddings)
         return vectorstore
     except Exception as e:
@@ -58,6 +59,7 @@ class Rag:
         prompt_template = """Your task is to create news to a newspaper based on pieces of texts delimited by <> and a question delimited by <>.
                     Do not make up any information, create the news just based on the given information on the pieces of texts delimited by <>.
                     The news should have a tittle.
                     The news should be written in a formal language.
                     The news should have between {min_words} and {max_words} words and it should be in portuguese language.
@@ -70,7 +72,7 @@ class Rag:
         self.qa = ConversationalRetrievalChain.from_llm(
                     llm=ChatOpenAI(model_name="gpt-3.5-turbo",
-                                   temperature=0.3,
                                    openai_api_key=os.environ['OPENAI_KEY'],
                                    max_tokens=int(int(max_words) + (int(max_words) / 2))), #número máximo de tokens para a resposta
                     memory=self.memory,
@@ -80,19 +82,24 @@ class Rag:
                     return_source_documents=True,
                 )
     def generate_text(self, subject):
-        query = f"Elabore uma nova notícia sobre {subject}."
-        result_text = self.qa.invoke({"question": query})
-        list_result_sources = []
-        str_result_sources = ''
-        for doc in result_text["source_documents"]:
-            list_result_sources.append(doc.metadata['link'])
-        result_sources = list(set(list_result_sources))
-        for i in range(len(result_sources)):
-            str_result_sources += f'{i + 1}) {result_sources[i]}' + '\n'
-        return (result_text["answer"], str_result_sources)

         fc_embeddings = OpenAIEmbeddings(openai_api_key=os.environ['OPENAI_KEY'])
         vectorstore = Chroma.from_documents(docs, fc_embeddings)
+        print('total de docs no vectorstore=',len(vectorstore.get()['documents']))
         return vectorstore
     except Exception as e:
         prompt_template = """Your task is to create news to a newspaper based on pieces of texts delimited by <> and a question delimited by <>.
                     Do not make up any information, create the news just based on the given information on the pieces of texts delimited by <>.
+                    If the information is not enough to create the news, you can use your knowledge to complete the news.
                     The news should have a tittle.
                     The news should be written in a formal language.
                     The news should have between {min_words} and {max_words} words and it should be in portuguese language.
         self.qa = ConversationalRetrievalChain.from_llm(
                     llm=ChatOpenAI(model_name="gpt-3.5-turbo",
+                                   temperature=1,
                                    openai_api_key=os.environ['OPENAI_KEY'],
                                    max_tokens=int(int(max_words) + (int(max_words) / 2))), #número máximo de tokens para a resposta
                     memory=self.memory,
                     return_source_documents=True,
                 )
     def generate_text(self, subject):
+        try:
+            query = f"Elabore uma nova notícia sobre {subject}."
+            result_text = self.qa.invoke({"question": query})
+            list_result_sources = []
+            str_result_sources = ''
+            for doc in result_text["source_documents"]:
+                list_result_sources.append(doc.metadata['link'])
+            result_sources = list(set(list_result_sources))
+            for i in range(len(result_sources)):
+                str_result_sources += f'{i + 1}) {result_sources[i]}' + '\n'
+            self.vectorstore.delete_collection()
+            return (result_text["answer"], str_result_sources)
+        except Exception as e:
+            self.vectorstore.delete_collection()
+            return str(e)

search_engine.py CHANGED Viewed

@@ -12,22 +12,27 @@ import constants
 def google_search_api(search_term, api_key, cse_id, **kwargs):
-    service = build("customsearch", "v1", developerKey=api_key)
-    res = service.cse().list(q=search_term, cx=cse_id, **kwargs).execute()
-    return res['items']
 def search_google(subject, sites):
     try:
         results = []
         for site in sites:
             print('    Buscando notícias no domínio: ' + site)
             query = f"{subject} site:{site}"
             sites_searched = google_search_api(query, os.environ['GOOGLE_KEY'], os.environ['GOOGLE_SEARCH'], num=constants.num_sites)
-            for s in sites_searched:
-                results.append(s['link'])
-                #time.sleep(3)
         print('    Total de sites encontrados: ' + str(len(results)))
         return results

 def google_search_api(search_term, api_key, cse_id, **kwargs):
+    try:
+        service = build("customsearch", "v1", developerKey=api_key)
+        res = service.cse().list(q=search_term, cx=cse_id, **kwargs).execute()
+        return res['items']
+    except Exception as e:
+        return -1
 def search_google(subject, sites):
     try:
         results = []
         for site in sites:
             print('    Buscando notícias no domínio: ' + site)
             query = f"{subject} site:{site}"
             sites_searched = google_search_api(query, os.environ['GOOGLE_KEY'], os.environ['GOOGLE_SEARCH'], num=constants.num_sites)
+            if sites_searched == -1:
+                results.append(site)
+            else:
+                for s in sites_searched:
+                    results.append(s['link'])
+                    #time.sleep(3)
         print('    Total de sites encontrados: ' + str(len(results)))
         return results