sohoso commited on
Commit
b1c04f8
1 Parent(s): 0df7c0e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +53 -33
app.py CHANGED
@@ -1,4 +1,11 @@
1
- import time, os, multiprocessing, torch, requests, asyncio, json, aiohttp
 
 
 
 
 
 
 
2
  from minivectordb.embedding_model import EmbeddingModel
3
  from minivectordb.vector_database import VectorDatabase
4
  from text_util_en_pt.cleaner import structurize_text, detect_language, Language
@@ -24,17 +31,17 @@ def index_and_search(query, text):
24
 
25
  # Indexing
26
  vector_db = VectorDatabase()
27
- sentences = [ s['sentence'] for s in structurize_text(text)]
28
 
29
  for idx, sentence in enumerate(sentences):
30
  sentence_embedding = model.extract_embeddings(sentence)
31
  vector_db.store_embedding(idx + 1, sentence_embedding, {'sentence': sentence})
32
-
33
  embedding_time = time.time() - start
34
 
35
  # Retrieval
36
  start = time.time()
37
- search_results = vector_db.find_most_similar(query_embedding, k = 30)
38
  retrieval_time = time.time() - start
39
  return '\n'.join([s['sentence'] for s in search_results[2]]), embedding_time, retrieval_time
40
 
@@ -45,21 +52,30 @@ def generate_search_terms(message, lang):
45
  prompt = f"From the following text, generate some search terms: \"{message}\"\nYour answer should be just the most appropriate search term, and nothing else."
46
 
47
  url = "https://openrouter.ai/api/v1/chat/completions"
48
- headers = { "Content-Type": "application/json",
49
- "Authorization": f"Bearer {openrouter_key}" }
50
- body = { "stream": False,
51
- "models": [
52
- "mistralai/mistral-7b-instruct:free",
53
- "openchat/openchat-7b:free"
54
- ],
55
- "route": "fallback",
56
- "max_tokens": 1024,
57
- "messages": [
58
- {"role": "user", "content": prompt}
59
- ] }
60
-
 
 
 
 
61
  response = requests.post(url, headers=headers, json=body)
62
- return response.json()['choices'][0]['message']['content']
 
 
 
 
 
63
 
64
  async def predict(message, history):
65
  full_response = ""
@@ -91,7 +107,7 @@ async def predict(message, history):
91
  for link in links:
92
  full_response += f"{link}\n"
93
  yield full_response
94
-
95
  full_response += "\nExtracting text from web pages...\n"
96
  yield full_response
97
  start = time.time()
@@ -118,20 +134,24 @@ async def predict(message, history):
118
  yield full_response
119
 
120
  full_response += "\nResponse: "
121
-
122
  url = "https://openrouter.ai/api/v1/chat/completions"
123
- headers = { "Content-Type": "application/json",
124
- "Authorization": f"Bearer {openrouter_key}" }
125
- body = { "stream": True,
126
- "models": [
127
- "mistralai/mistral-7b-instruct:free",
128
- "openchat/openchat-7b:free"
129
- ],
130
- "route": "fallback",
131
- "max_tokens": 1024,
132
- "messages": [
133
- {"role": "user", "content": prompt}
134
- ] }
 
 
 
 
135
 
136
  async with aiohttp.ClientSession() as session:
137
  async with session.post(url, headers=headers, json=body) as response:
@@ -169,4 +189,4 @@ gr.ChatInterface(
169
  'Compare the current economies of China and India?',
170
  'What are new shoe design trends in 2024',
171
  ]
172
- ).launch()
 
1
+ import time
2
+ import os
3
+ import multiprocessing
4
+ import torch
5
+ import requests
6
+ import asyncio
7
+ import json
8
+ import aiohttp
9
  from minivectordb.embedding_model import EmbeddingModel
10
  from minivectordb.vector_database import VectorDatabase
11
  from text_util_en_pt.cleaner import structurize_text, detect_language, Language
 
31
 
32
  # Indexing
33
  vector_db = VectorDatabase()
34
+ sentences = [s['sentence'] for s in structurize_text(text)]
35
 
36
  for idx, sentence in enumerate(sentences):
37
  sentence_embedding = model.extract_embeddings(sentence)
38
  vector_db.store_embedding(idx + 1, sentence_embedding, {'sentence': sentence})
39
+
40
  embedding_time = time.time() - start
41
 
42
  # Retrieval
43
  start = time.time()
44
+ search_results = vector_db.find_most_similar(query_embedding, k=30)
45
  retrieval_time = time.time() - start
46
  return '\n'.join([s['sentence'] for s in search_results[2]]), embedding_time, retrieval_time
47
 
 
52
  prompt = f"From the following text, generate some search terms: \"{message}\"\nYour answer should be just the most appropriate search term, and nothing else."
53
 
54
  url = "https://openrouter.ai/api/v1/chat/completions"
55
+ headers = {
56
+ "Content-Type": "application/json",
57
+ "Authorization": f"Bearer {openrouter_key}"
58
+ }
59
+ body = {
60
+ "stream": False,
61
+ "models": [
62
+ "mistralai/mistral-7b-instruct:free",
63
+ "openchat/openchat-7b:free"
64
+ ],
65
+ "route": "fallback",
66
+ "max_tokens": 1024,
67
+ "messages": [
68
+ {"role": "user", "content": prompt}
69
+ ]
70
+ }
71
+
72
  response = requests.post(url, headers=headers, json=body)
73
+ response_json = response.json()
74
+ try:
75
+ return response_json['choices'][0]['message']['content']
76
+ except KeyError:
77
+ print(f"Error: 'choices' key not found in the response. Response: {response_json}")
78
+ return None
79
 
80
  async def predict(message, history):
81
  full_response = ""
 
107
  for link in links:
108
  full_response += f"{link}\n"
109
  yield full_response
110
+
111
  full_response += "\nExtracting text from web pages...\n"
112
  yield full_response
113
  start = time.time()
 
134
  yield full_response
135
 
136
  full_response += "\nResponse: "
137
+
138
  url = "https://openrouter.ai/api/v1/chat/completions"
139
+ headers = {
140
+ "Content-Type": "application/json",
141
+ "Authorization": f"Bearer {openrouter_key}"
142
+ }
143
+ body = {
144
+ "stream": True,
145
+ "models": [
146
+ "mistralai/mistral-7b-instruct:free",
147
+ "openchat/openchat-7b:free"
148
+ ],
149
+ "route": "fallback",
150
+ "max_tokens": 1024,
151
+ "messages": [
152
+ {"role": "user", "content": prompt}
153
+ ]
154
+ }
155
 
156
  async with aiohttp.ClientSession() as session:
157
  async with session.post(url, headers=headers, json=body) as response:
 
189
  'Compare the current economies of China and India?',
190
  'What are new shoe design trends in 2024',
191
  ]
192
+ ).launch()