Spaces:
Runtime error
Runtime error
leandrocarneiro
commited on
Commit
•
566bba1
1
Parent(s):
99c997b
Upload 7 files
Browse files- app.py +5 -2
- constants.py +1 -1
- rag.py +21 -14
- search_engine.py +12 -7
app.py
CHANGED
@@ -19,9 +19,12 @@ def call_generate_news(subject, sites, min_words, max_words):
|
|
19 |
if max_words < min_words:
|
20 |
return 'Erro: Máximo de palavras menor que o mínimo de palavras'
|
21 |
|
22 |
-
|
23 |
list_sites = sites.split('\n')
|
24 |
-
|
|
|
|
|
|
|
25 |
return result_news
|
26 |
|
27 |
def call_invoke_llm(context, prompt):
|
|
|
19 |
if max_words < min_words:
|
20 |
return 'Erro: Máximo de palavras menor que o mínimo de palavras'
|
21 |
|
22 |
+
filtered_list_sites = []
|
23 |
list_sites = sites.split('\n')
|
24 |
+
for item in list_sites:
|
25 |
+
if item:
|
26 |
+
filtered_list_sites.append(item)
|
27 |
+
result_news = main.generate_news(subject, min_words, max_words, filtered_list_sites)
|
28 |
return result_news
|
29 |
|
30 |
def call_invoke_llm(context, prompt):
|
constants.py
CHANGED
@@ -5,5 +5,5 @@
|
|
5 |
#subject = 'Guerra entre Irã e Paquistão'
|
6 |
|
7 |
#sites = ['https://www.cnnbrasil.com.br/']#, 'https://g1.globo.com/', 'https://www.metropoles.com/']
|
8 |
-
num_sites =
|
9 |
local_base = 'local_base'
|
|
|
5 |
#subject = 'Guerra entre Irã e Paquistão'
|
6 |
|
7 |
#sites = ['https://www.cnnbrasil.com.br/']#, 'https://g1.globo.com/', 'https://www.metropoles.com/']
|
8 |
+
num_sites = 5
|
9 |
local_base = 'local_base'
|
rag.py
CHANGED
@@ -44,6 +44,7 @@ def generate_embeddings_and_vectorstore(path):
|
|
44 |
|
45 |
fc_embeddings = OpenAIEmbeddings(openai_api_key=os.environ['OPENAI_KEY'])
|
46 |
vectorstore = Chroma.from_documents(docs, fc_embeddings)
|
|
|
47 |
|
48 |
return vectorstore
|
49 |
except Exception as e:
|
@@ -58,6 +59,7 @@ class Rag:
|
|
58 |
|
59 |
prompt_template = """Your task is to create news to a newspaper based on pieces of texts delimited by <> and a question delimited by <>.
|
60 |
Do not make up any information, create the news just based on the given information on the pieces of texts delimited by <>.
|
|
|
61 |
The news should have a tittle.
|
62 |
The news should be written in a formal language.
|
63 |
The news should have between {min_words} and {max_words} words and it should be in portuguese language.
|
@@ -70,7 +72,7 @@ class Rag:
|
|
70 |
|
71 |
self.qa = ConversationalRetrievalChain.from_llm(
|
72 |
llm=ChatOpenAI(model_name="gpt-3.5-turbo",
|
73 |
-
temperature=
|
74 |
openai_api_key=os.environ['OPENAI_KEY'],
|
75 |
max_tokens=int(int(max_words) + (int(max_words) / 2))), #número máximo de tokens para a resposta
|
76 |
memory=self.memory,
|
@@ -80,19 +82,24 @@ class Rag:
|
|
80 |
return_source_documents=True,
|
81 |
)
|
82 |
def generate_text(self, subject):
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
-
|
|
|
|
|
|
|
|
|
|
|
96 |
|
97 |
|
98 |
|
|
|
44 |
|
45 |
fc_embeddings = OpenAIEmbeddings(openai_api_key=os.environ['OPENAI_KEY'])
|
46 |
vectorstore = Chroma.from_documents(docs, fc_embeddings)
|
47 |
+
print('total de docs no vectorstore=',len(vectorstore.get()['documents']))
|
48 |
|
49 |
return vectorstore
|
50 |
except Exception as e:
|
|
|
59 |
|
60 |
prompt_template = """Your task is to create news to a newspaper based on pieces of texts delimited by <> and a question delimited by <>.
|
61 |
Do not make up any information, create the news just based on the given information on the pieces of texts delimited by <>.
|
62 |
+
If the information is not enough to create the news, you can use your knowledge to complete the news.
|
63 |
The news should have a tittle.
|
64 |
The news should be written in a formal language.
|
65 |
The news should have between {min_words} and {max_words} words and it should be in portuguese language.
|
|
|
72 |
|
73 |
self.qa = ConversationalRetrievalChain.from_llm(
|
74 |
llm=ChatOpenAI(model_name="gpt-3.5-turbo",
|
75 |
+
temperature=1,
|
76 |
openai_api_key=os.environ['OPENAI_KEY'],
|
77 |
max_tokens=int(int(max_words) + (int(max_words) / 2))), #número máximo de tokens para a resposta
|
78 |
memory=self.memory,
|
|
|
82 |
return_source_documents=True,
|
83 |
)
|
84 |
def generate_text(self, subject):
|
85 |
+
try:
|
86 |
+
query = f"Elabore uma nova notícia sobre {subject}."
|
87 |
+
result_text = self.qa.invoke({"question": query})
|
88 |
+
|
89 |
+
list_result_sources = []
|
90 |
+
str_result_sources = ''
|
91 |
+
for doc in result_text["source_documents"]:
|
92 |
+
list_result_sources.append(doc.metadata['link'])
|
93 |
+
result_sources = list(set(list_result_sources))
|
94 |
+
for i in range(len(result_sources)):
|
95 |
+
str_result_sources += f'{i + 1}) {result_sources[i]}' + '\n'
|
96 |
+
|
97 |
+
self.vectorstore.delete_collection()
|
98 |
+
|
99 |
+
return (result_text["answer"], str_result_sources)
|
100 |
+
except Exception as e:
|
101 |
+
self.vectorstore.delete_collection()
|
102 |
+
return str(e)
|
103 |
|
104 |
|
105 |
|
search_engine.py
CHANGED
@@ -12,22 +12,27 @@ import constants
|
|
12 |
|
13 |
|
14 |
def google_search_api(search_term, api_key, cse_id, **kwargs):
|
15 |
-
|
16 |
-
|
17 |
-
|
|
|
|
|
|
|
18 |
|
19 |
|
20 |
def search_google(subject, sites):
|
21 |
try:
|
22 |
-
|
23 |
results = []
|
24 |
for site in sites:
|
25 |
print(' Buscando notícias no domínio: ' + site)
|
26 |
query = f"{subject} site:{site}"
|
27 |
sites_searched = google_search_api(query, os.environ['GOOGLE_KEY'], os.environ['GOOGLE_SEARCH'], num=constants.num_sites)
|
28 |
-
|
29 |
-
results.append(
|
30 |
-
|
|
|
|
|
|
|
31 |
print(' Total de sites encontrados: ' + str(len(results)))
|
32 |
|
33 |
return results
|
|
|
12 |
|
13 |
|
14 |
def google_search_api(search_term, api_key, cse_id, **kwargs):
|
15 |
+
try:
|
16 |
+
service = build("customsearch", "v1", developerKey=api_key)
|
17 |
+
res = service.cse().list(q=search_term, cx=cse_id, **kwargs).execute()
|
18 |
+
return res['items']
|
19 |
+
except Exception as e:
|
20 |
+
return -1
|
21 |
|
22 |
|
23 |
def search_google(subject, sites):
|
24 |
try:
|
|
|
25 |
results = []
|
26 |
for site in sites:
|
27 |
print(' Buscando notícias no domínio: ' + site)
|
28 |
query = f"{subject} site:{site}"
|
29 |
sites_searched = google_search_api(query, os.environ['GOOGLE_KEY'], os.environ['GOOGLE_SEARCH'], num=constants.num_sites)
|
30 |
+
if sites_searched == -1:
|
31 |
+
results.append(site)
|
32 |
+
else:
|
33 |
+
for s in sites_searched:
|
34 |
+
results.append(s['link'])
|
35 |
+
#time.sleep(3)
|
36 |
print(' Total de sites encontrados: ' + str(len(results)))
|
37 |
|
38 |
return results
|