Spaces:
Sleeping
Sleeping
File size: 4,434 Bytes
c4f995d b3159ec c4f995d b3159ec c4f995d b3159ec c4f995d 9cc5d1c c4f995d b3159ec c4f995d b3159ec c4f995d b3159ec c4f995d b3159ec c4f995d b3159ec c4f995d b3159ec c4f995d b3159ec c4f995d b3159ec c4f995d b3159ec 636ba9a b3159ec c4f995d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 |
import requests
import json
class VectaraQuery():
def __init__(self, api_key: str, corpus_keys: list[str], prompt_name: str = None):
self.corpus_keys = corpus_keys
self.api_key = api_key
self.prompt_name = prompt_name if prompt_name else "vectara-experimental-summary-ext-2023-12-11-sml"
self.conv_id = None
def get_body(self, query_str: str, stream: False):
corpora_list = [{
'corpus_key': corpus_key, 'lexical_interpolation': 0.005
} for corpus_key in self.corpus_keys
]
return {
'query': query_str,
'search':
{
'corpora': corpora_list,
'offset': 0,
'limit': 50,
'context_configuration':
{
'sentences_before': 2,
'sentences_after': 2,
'start_tag': "%START_SNIPPET%",
'end_tag': "%END_SNIPPET%",
},
'reranker':
{
'type': 'mmr'
},
},
'generation':
{
'prompt_name': self.prompt_name,
'max_used_search_results': 10,
'response_language': 'eng',
'citations':
{
'style': 'none'
}
},
'chat':
{
'store': True
},
'stream_response': stream
}
def get_headers(self):
return {
"Content-Type": "application/json",
"Accept": "application/json",
"x-api-key": self.api_key,
"grpc-timeout": "60S"
}
def get_stream_headers(self):
return {
"Content-Type": "application/json",
"Accept": "text/event-stream",
"x-api-key": self.api_key,
"grpc-timeout": "60S"
}
def submit_query(self, query_str: str):
if self.conv_id:
endpoint = f"https://api.vectara.io/v2/chats/{self.conv_id}/turns"
else:
endpoint = "https://api.vectara.io/v2/chats"
body = self.get_body(query_str, stream=False)
response = requests.post(endpoint, data=json.dumps(body), verify=True, headers=self.get_headers())
if response.status_code != 200:
print(f"Query failed with code {response.status_code}, reason {response.reason}, text {response.text}")
return "Sorry, something went wrong in my brain. Please try again later."
res = response.json()
if self.conv_id is None:
self.conv_id = res['chat_id']
summary = res['answer']
# FIGURE OUT HOW TO IMPLEMENT THIS IN APIV2
# if chat and chat['status'] is not None:
# st_code = chat['status']
# print(f"Chat query failed with code {st_code}")
# if st_code == 'RESOURCE_EXHAUSTED':
# self.conv_id = None
# return 'Sorry, Vectara chat turns exceeds plan limit.'
# return 'Sorry, something went wrong in my brain. Please try again later.'
return summary
def submit_query_streaming(self, query_str: str):
if self.conv_id:
endpoint = f"https://api.vectara.io/v2/chats/{self.conv_id}/turns"
else:
endpoint = "https://api.vectara.io/v2/chats"
body = self.get_body(query_str, stream=True)
response = requests.post(endpoint, data=json.dumps(body), verify=True, headers=self.get_stream_headers(), stream=True)
if response.status_code != 200:
print(f"Query failed with code {response.status_code}, reason {response.reason}, text {response.text}")
return "Sorry, something went wrong in my brain. Please try again later."
chunks = []
for line in response.iter_lines():
line = line.decode('utf-8')
if line: # filter out keep-alive new lines
key, value = line.split(':', 1)
if key == 'data':
line = json.loads(value)
if line['type'] == 'generation_chunk':
chunk = line['generation_chunk']
chunks.append(chunk)
yield chunk
return ''.join(chunks) |