Spaces:

vectara
/

starwars-chat

Running

App Files Files Community

github-actions commited on Jul 20

Commit

8a46321

•

1 Parent(s): 9f7a9ec

Sync updates from source repository

Browse files

Files changed (2) hide show

app.py +19 -22
query.py +84 -87

app.py CHANGED Viewed

@@ -7,8 +7,8 @@ from streamlit_pills import pills
 from PIL import Image
-max_examples = 4
 def isTrue(x) -> bool:
     if isinstance(x, bool):
         return x
@@ -22,7 +22,7 @@ def launch_bot():
     def generate_streaming_response(question):
         response = vq.submit_query_streaming(question)
         return response
     def show_example_questions():
         if len(st.session_state.example_messages) > 0 and st.session_state.first_turn:
             selected_example = pills("Queries to Try:", st.session_state.example_messages, index=None)
@@ -31,27 +31,25 @@ def launch_bot():
                 st.session_state.first_turn = False
                 return True
         return False
     if 'cfg' not in st.session_state:
-        corpus_ids = str(os.environ['corpus_ids']).split(',')
         cfg = OmegaConf.create({
-            'customer_id': str(os.environ['customer_id']),
-            'corpus_ids': corpus_ids,
             'api_key': str(os.environ['api_key']),
             'title': os.environ['title'],
-            'description': os.environ['description'],
             'source_data_desc': os.environ['source_data_desc'],
             'streaming': isTrue(os.environ.get('streaming', False)),
             'prompt_name': os.environ.get('prompt_name', None),
-            'examples': os.environ.get('examples', '')
         })
         st.session_state.cfg = cfg
         st.session_state.ex_prompt = None
         st.session_state.first_turn = True
         example_messages = [example.strip() for example in cfg.examples.split(",")]
         st.session_state.example_messages = [em for em in example_messages if len(em)>0][:max_examples]
-        st.session_state.vq = VectaraQuery(cfg.api_key, cfg.customer_id, cfg.corpus_ids, cfg.prompt_name)
     cfg = st.session_state.cfg
     vq = st.session_state.vq
@@ -60,7 +58,8 @@ def launch_bot():
     # left side content
     with st.sidebar:
         image = Image.open('Vectara-logo.png')
-        st.markdown(f"## Welcome to {cfg.title}\n\n"
                     f"This demo uses Retrieval Augmented Generation to ask questions about {cfg.source_data_desc}\n\n")
         st.markdown("---")
@@ -71,25 +70,23 @@ def launch_bot():
             "This app uses Vectara [Chat API](https://docs.vectara.com/docs/console-ui/vectara-chat-overview) to query the corpus and present the results to you, answering your question.\n\n"
         )
         st.markdown("---")
-        st.image(image, width=250)
-    st.markdown(f"<center> <h2> Vectara chat demo: {cfg.title} </h2> </center>", unsafe_allow_html=True)
-    st.markdown(f"<center> <h4> {cfg.description} <h4> </center>", unsafe_allow_html=True)
     if "messages" not in st.session_state.keys():
         st.session_state.messages = [{"role": "assistant", "content": "How may I help you?"}]
     example_container = st.empty()
     with example_container:
         if show_example_questions():
             example_container.empty()
             st.rerun()
-    # Display chat messages
-    for message in st.session_state.messages:
-        with st.chat_message(message["role"]):
-            st.write(message["content"])
     # select prompt from example question or user provided input
     if st.session_state.ex_prompt:
@@ -117,4 +114,4 @@ def launch_bot():
             st.rerun()
 if __name__ == "__main__":
-    launch_bot()

 from PIL import Image
+max_examples = 6
 def isTrue(x) -> bool:
     if isinstance(x, bool):
         return x
     def generate_streaming_response(question):
         response = vq.submit_query_streaming(question)
         return response
     def show_example_questions():
         if len(st.session_state.example_messages) > 0 and st.session_state.first_turn:
             selected_example = pills("Queries to Try:", st.session_state.example_messages, index=None)
                 st.session_state.first_turn = False
                 return True
         return False
     if 'cfg' not in st.session_state:
+        corpus_keys = str(os.environ['corpus_keys']).split(',')
         cfg = OmegaConf.create({
+            'corpus_keys': corpus_keys,
             'api_key': str(os.environ['api_key']),
             'title': os.environ['title'],
             'source_data_desc': os.environ['source_data_desc'],
             'streaming': isTrue(os.environ.get('streaming', False)),
             'prompt_name': os.environ.get('prompt_name', None),
+            'examples': os.environ.get('examples', None)
         })
         st.session_state.cfg = cfg
         st.session_state.ex_prompt = None
         st.session_state.first_turn = True
         example_messages = [example.strip() for example in cfg.examples.split(",")]
         st.session_state.example_messages = [em for em in example_messages if len(em)>0][:max_examples]
+        st.session_state.vq = VectaraQuery(cfg.api_key, cfg.corpus_keys, cfg.prompt_name)
     cfg = st.session_state.cfg
     vq = st.session_state.vq
     # left side content
     with st.sidebar:
         image = Image.open('Vectara-logo.png')
+        st.image(image, width=175)
+        st.markdown(f"## About\n\n"
                     f"This demo uses Retrieval Augmented Generation to ask questions about {cfg.source_data_desc}\n\n")
         st.markdown("---")
             "This app uses Vectara [Chat API](https://docs.vectara.com/docs/console-ui/vectara-chat-overview) to query the corpus and present the results to you, answering your question.\n\n"
         )
         st.markdown("---")
+    st.markdown(f"<center> <h2> Vectara AI Assistant: {cfg.title} </h2> </center>", unsafe_allow_html=True)
     if "messages" not in st.session_state.keys():
         st.session_state.messages = [{"role": "assistant", "content": "How may I help you?"}]
+    # Display chat messages
+    for message in st.session_state.messages:
+        with st.chat_message(message["role"]):
+            st.write(message["content"])
     example_container = st.empty()
     with example_container:
         if show_example_questions():
             example_container.empty()
             st.rerun()
     # select prompt from example question or user provided input
     if st.session_state.ex_prompt:
             st.rerun()
 if __name__ == "__main__":
+    launch_bot()

query.py CHANGED Viewed

@@ -3,52 +3,54 @@ import json
 class VectaraQuery():
-    def __init__(self, api_key: str, customer_id: str, corpus_ids: list[str], prompt_name: str = None):
-        self.customer_id = customer_id
-        self.corpus_ids = corpus_ids
         self.api_key = api_key
-        self.prompt_name = prompt_name if prompt_name else "vectara-experimental-summary-ext-2023-12-11-sml"
         self.conv_id = None
-    def get_body(self, query_str: str):
-        corpora_key_list = [{
-                'customer_id': self.customer_id, 'corpus_id': corpus_id, 'lexical_interpolation_config': {'lambda': 0.005}
-            } for corpus_id in self.corpus_ids
         ]
         return {
-            'query': [
-                {
-                    'query': query_str,
-                    'start': 0,
-                    'numResults': 50,
-                    'corpusKey': corpora_key_list,
-                    'context_config': {
-                        'sentences_before': 2,
-                        'sentences_after': 2,
-                        'start_tag': "%START_SNIPPET%",
-                        'end_tag': "%END_SNIPPET%",
-                    },
-                    'rerankingConfig':
-                    {
-                        'rerankerId': 272725719,
-                    },
-                    'summary': [
-                        {
-                            'responseLang': 'eng',
-                            'maxSummarizedResults': 10,
-                            'summarizerPromptName': self.prompt_name,
-                            'chat': {
-                                'store': True,
-                                'conversationId': self.conv_id
-                            },
-                            'citationParams': {
-                                "style": "NONE",
-                            }
-                        }
-                    ]
-                }
-            ]
         }
@@ -56,76 +58,71 @@ class VectaraQuery():
         return {
             "Content-Type": "application/json",
             "Accept": "application/json",
-            "customer-id": self.customer_id,
             "x-api-key": self.api_key,
             "grpc-timeout": "60S"
         }
     def submit_query(self, query_str: str):
-        endpoint = f"https://api.vectara.io/v1/query"
-        body = self.get_body(query_str)
-        response = requests.post(endpoint, data=json.dumps(body), verify=True, headers=self.get_headers())
         if response.status_code != 200:
             print(f"Query failed with code {response.status_code}, reason {response.reason}, text {response.text}")
             return "Sorry, something went wrong in my brain. Please try again later."
         res = response.json()
-        summary = res['responseSet'][0]['summary'][0]['text']
-        chat = res['responseSet'][0]['summary'][0].get('chat', None)
-        if chat and chat['status'] is not None:
-            st_code = chat['status']
-            print(f"Chat query failed with code {st_code}")
-            if st_code == 'RESOURCE_EXHAUSTED':
-                self.conv_id = None
-                return 'Sorry, Vectara chat turns exceeds plan limit.'
-            return 'Sorry, something went wrong in my brain. Please try again later.'
-        self.conv_id = chat['conversationId'] if chat else None
         return summary
     def submit_query_streaming(self, query_str: str):
-        endpoint = "https://api.vectara.io/v1/stream-query"
-        body = self.get_body(query_str)
-        response = requests.post(endpoint, data=json.dumps(body), verify=True, headers=self.get_headers(), stream=True)
         if response.status_code != 200:
             print(f"Query failed with code {response.status_code}, reason {response.reason}, text {response.text}")
-            return "Sorry, something went wrong in my brain. Please try again later."
         chunks = []
         for line in response.iter_lines():
             if line:  # filter out keep-alive new lines
-                data = json.loads(line.decode('utf-8'))
-                res = data['result']
-                response_set = res['responseSet']
-                if response_set is None:
-                    # grab next chunk and yield it as output
-                    summary = res.get('summary', None)
-                    if summary is None or len(summary)==0:
-                        continue
-                    else:
-                        chat = summary.get('chat', None)
-                        if chat and chat.get('status', None):
-                            st_code = chat['status']
-                            print(f"Chat query failed with code {st_code}")
-                            if st_code == 'RESOURCE_EXHAUSTED':
-                                self.conv_id = None
-                                return 'Sorry, Vectara chat turns exceeds plan limit.'
-                            return 'Sorry, something went wrong in my brain. Please try again later.'
-                        conv_id = chat.get('conversationId', None) if chat else None
-                        if conv_id:
-                            self.conv_id = conv_id
-                    chunk = summary['text']
-                    chunks.append(chunk)
-                    yield chunk
-                    if summary['done']:
-                        break
-        return ''.join(chunks)

 class VectaraQuery():
+    def __init__(self, api_key: str, corpus_keys: list[str], prompt_name: str = None):
+        self.corpus_keys = corpus_keys
         self.api_key = api_key
+        self.prompt_name = prompt_name if prompt_name else "vectara-summary-ext-24-05-sml"
         self.conv_id = None
+    def get_body(self, query_str: str, stream: False):
+        corpora_list = [{
+                'corpus_key': corpus_key, 'lexical_interpolation': 0.005
+            } for corpus_key in self.corpus_keys
         ]
         return {
+            'query': query_str,
+            'search':
+            {
+                'corpora': corpora_list,
+                'offset': 0,
+                'limit': 50,
+                'context_configuration':
+                {
+                    'sentences_before': 2,
+                    'sentences_after': 2,
+                    'start_tag': "%START_SNIPPET%",
+                    'end_tag': "%END_SNIPPET%",
+                },
+                'reranker':
+                {
+                    'type': 'customer_reranker',
+		        'reranker_id': 'rnk_272725719'
+                },
+            },
+            'generation':
+            {
+                'prompt_name': self.prompt_name,
+                'max_used_search_results': 10,
+                'response_language': 'eng',
+                'citations':
+                {
+                    'style': 'none'
+                }
+            },
+            'chat':
+            {
+                'store': True
+            },
+            'stream_response': stream
         }
         return {
             "Content-Type": "application/json",
             "Accept": "application/json",
+            "x-api-key": self.api_key,
+            "grpc-timeout": "60S"
+        }
+    def get_stream_headers(self):
+        return {
+            "Content-Type": "application/json",
+            "Accept": "text/event-stream",
             "x-api-key": self.api_key,
             "grpc-timeout": "60S"
         }
     def submit_query(self, query_str: str):
+        if self.conv_id:
+            endpoint = f"https://api.vectara.io/v2/chats/{self.conv_id}/turns"
+        else:
+            endpoint = "https://api.vectara.io/v2/chats"
+        body = self.get_body(query_str, stream=False)
+        response = requests.post(endpoint, data=json.dumps(body), verify=True, headers=self.get_headers())
         if response.status_code != 200:
             print(f"Query failed with code {response.status_code}, reason {response.reason}, text {response.text}")
+            if response.status_code == 429:
+                return "Sorry, Vectara chat turns exceeds plan limit."
             return "Sorry, something went wrong in my brain. Please try again later."
         res = response.json()
+        if self.conv_id is None:
+            self.conv_id = res['chat_id']
+        summary = res['answer']
         return summary
     def submit_query_streaming(self, query_str: str):
+        if self.conv_id:
+            endpoint = f"https://api.vectara.io/v2/chats/{self.conv_id}/turns"
+        else:
+            endpoint = "https://api.vectara.io/v2/chats"
+        body = self.get_body(query_str, stream=True)
+        response = requests.post(endpoint, data=json.dumps(body), verify=True, headers=self.get_stream_headers(), stream=True)
         if response.status_code != 200:
             print(f"Query failed with code {response.status_code}, reason {response.reason}, text {response.text}")
+            if response.status_code == 429:
+                return "Sorry, Vectara chat turns exceeds plan limit."
+            return "Sorry, something went wrong in my brain. Please try again later."
         chunks = []
         for line in response.iter_lines():
+            line = line.decode('utf-8')
             if line:  # filter out keep-alive new lines
+                key, value = line.split(':', 1)
+                if key == 'data':
+                    line = json.loads(value)
+                    if line['type'] == 'generation_chunk':
+                        chunk = line['generation_chunk']
+                        chunks.append(chunk)
+                        yield chunk
+        return ''.join(chunks)