leandrocarneiro commited on
Commit
566bba1
1 Parent(s): 99c997b

Upload 7 files

Browse files
Files changed (4) hide show
  1. app.py +5 -2
  2. constants.py +1 -1
  3. rag.py +21 -14
  4. search_engine.py +12 -7
app.py CHANGED
@@ -19,9 +19,12 @@ def call_generate_news(subject, sites, min_words, max_words):
19
  if max_words < min_words:
20
  return 'Erro: Máximo de palavras menor que o mínimo de palavras'
21
 
22
-
23
  list_sites = sites.split('\n')
24
- result_news = main.generate_news(subject, min_words, max_words, list_sites)
 
 
 
25
  return result_news
26
 
27
  def call_invoke_llm(context, prompt):
 
19
  if max_words < min_words:
20
  return 'Erro: Máximo de palavras menor que o mínimo de palavras'
21
 
22
+ filtered_list_sites = []
23
  list_sites = sites.split('\n')
24
+ for item in list_sites:
25
+ if item:
26
+ filtered_list_sites.append(item)
27
+ result_news = main.generate_news(subject, min_words, max_words, filtered_list_sites)
28
  return result_news
29
 
30
  def call_invoke_llm(context, prompt):
constants.py CHANGED
@@ -5,5 +5,5 @@
5
  #subject = 'Guerra entre Irã e Paquistão'
6
 
7
  #sites = ['https://www.cnnbrasil.com.br/']#, 'https://g1.globo.com/', 'https://www.metropoles.com/']
8
- num_sites = 1#5
9
  local_base = 'local_base'
 
5
  #subject = 'Guerra entre Irã e Paquistão'
6
 
7
  #sites = ['https://www.cnnbrasil.com.br/']#, 'https://g1.globo.com/', 'https://www.metropoles.com/']
8
+ num_sites = 5
9
  local_base = 'local_base'
rag.py CHANGED
@@ -44,6 +44,7 @@ def generate_embeddings_and_vectorstore(path):
44
 
45
  fc_embeddings = OpenAIEmbeddings(openai_api_key=os.environ['OPENAI_KEY'])
46
  vectorstore = Chroma.from_documents(docs, fc_embeddings)
 
47
 
48
  return vectorstore
49
  except Exception as e:
@@ -58,6 +59,7 @@ class Rag:
58
 
59
  prompt_template = """Your task is to create news to a newspaper based on pieces of texts delimited by <> and a question delimited by <>.
60
  Do not make up any information, create the news just based on the given information on the pieces of texts delimited by <>.
 
61
  The news should have a tittle.
62
  The news should be written in a formal language.
63
  The news should have between {min_words} and {max_words} words and it should be in portuguese language.
@@ -70,7 +72,7 @@ class Rag:
70
 
71
  self.qa = ConversationalRetrievalChain.from_llm(
72
  llm=ChatOpenAI(model_name="gpt-3.5-turbo",
73
- temperature=0.3,
74
  openai_api_key=os.environ['OPENAI_KEY'],
75
  max_tokens=int(int(max_words) + (int(max_words) / 2))), #número máximo de tokens para a resposta
76
  memory=self.memory,
@@ -80,19 +82,24 @@ class Rag:
80
  return_source_documents=True,
81
  )
82
  def generate_text(self, subject):
83
- query = f"Elabore uma nova notícia sobre {subject}."
84
- result_text = self.qa.invoke({"question": query})
85
-
86
- list_result_sources = []
87
- str_result_sources = ''
88
- for doc in result_text["source_documents"]:
89
- list_result_sources.append(doc.metadata['link'])
90
- result_sources = list(set(list_result_sources))
91
- for i in range(len(result_sources)):
92
- str_result_sources += f'{i + 1}) {result_sources[i]}' + '\n'
93
-
94
- return (result_text["answer"], str_result_sources)
95
-
 
 
 
 
 
96
 
97
 
98
 
 
44
 
45
  fc_embeddings = OpenAIEmbeddings(openai_api_key=os.environ['OPENAI_KEY'])
46
  vectorstore = Chroma.from_documents(docs, fc_embeddings)
47
+ print('total de docs no vectorstore=',len(vectorstore.get()['documents']))
48
 
49
  return vectorstore
50
  except Exception as e:
 
59
 
60
  prompt_template = """Your task is to create news to a newspaper based on pieces of texts delimited by <> and a question delimited by <>.
61
  Do not make up any information, create the news just based on the given information on the pieces of texts delimited by <>.
62
+ If the information is not enough to create the news, you can use your knowledge to complete the news.
63
  The news should have a tittle.
64
  The news should be written in a formal language.
65
  The news should have between {min_words} and {max_words} words and it should be in portuguese language.
 
72
 
73
  self.qa = ConversationalRetrievalChain.from_llm(
74
  llm=ChatOpenAI(model_name="gpt-3.5-turbo",
75
+ temperature=1,
76
  openai_api_key=os.environ['OPENAI_KEY'],
77
  max_tokens=int(int(max_words) + (int(max_words) / 2))), #número máximo de tokens para a resposta
78
  memory=self.memory,
 
82
  return_source_documents=True,
83
  )
84
  def generate_text(self, subject):
85
+ try:
86
+ query = f"Elabore uma nova notícia sobre {subject}."
87
+ result_text = self.qa.invoke({"question": query})
88
+
89
+ list_result_sources = []
90
+ str_result_sources = ''
91
+ for doc in result_text["source_documents"]:
92
+ list_result_sources.append(doc.metadata['link'])
93
+ result_sources = list(set(list_result_sources))
94
+ for i in range(len(result_sources)):
95
+ str_result_sources += f'{i + 1}) {result_sources[i]}' + '\n'
96
+
97
+ self.vectorstore.delete_collection()
98
+
99
+ return (result_text["answer"], str_result_sources)
100
+ except Exception as e:
101
+ self.vectorstore.delete_collection()
102
+ return str(e)
103
 
104
 
105
 
search_engine.py CHANGED
@@ -12,22 +12,27 @@ import constants
12
 
13
 
14
  def google_search_api(search_term, api_key, cse_id, **kwargs):
15
- service = build("customsearch", "v1", developerKey=api_key)
16
- res = service.cse().list(q=search_term, cx=cse_id, **kwargs).execute()
17
- return res['items']
 
 
 
18
 
19
 
20
  def search_google(subject, sites):
21
  try:
22
-
23
  results = []
24
  for site in sites:
25
  print(' Buscando notícias no domínio: ' + site)
26
  query = f"{subject} site:{site}"
27
  sites_searched = google_search_api(query, os.environ['GOOGLE_KEY'], os.environ['GOOGLE_SEARCH'], num=constants.num_sites)
28
- for s in sites_searched:
29
- results.append(s['link'])
30
- #time.sleep(3)
 
 
 
31
  print(' Total de sites encontrados: ' + str(len(results)))
32
 
33
  return results
 
12
 
13
 
14
  def google_search_api(search_term, api_key, cse_id, **kwargs):
15
+ try:
16
+ service = build("customsearch", "v1", developerKey=api_key)
17
+ res = service.cse().list(q=search_term, cx=cse_id, **kwargs).execute()
18
+ return res['items']
19
+ except Exception as e:
20
+ return -1
21
 
22
 
23
  def search_google(subject, sites):
24
  try:
 
25
  results = []
26
  for site in sites:
27
  print(' Buscando notícias no domínio: ' + site)
28
  query = f"{subject} site:{site}"
29
  sites_searched = google_search_api(query, os.environ['GOOGLE_KEY'], os.environ['GOOGLE_SEARCH'], num=constants.num_sites)
30
+ if sites_searched == -1:
31
+ results.append(site)
32
+ else:
33
+ for s in sites_searched:
34
+ results.append(s['link'])
35
+ #time.sleep(3)
36
  print(' Total de sites encontrados: ' + str(len(results)))
37
 
38
  return results