Spaces:

abdullahmeda
/

OpenChat

Sleeping

App Files Files Community

abdullahmeda commited on Sep 30, 2023

Commit

ad7df47

•

1 Parent(s): f0c73b9

added streaming functionality

Browse files

Files changed (1) hide show

app.py +86 -15

app.py CHANGED Viewed

@@ -1,21 +1,68 @@
 import gradio as gr
 from langchain.chat_models import ChatOpenAI
 from langchain.chains import ConversationChain
 from langchain.memory import ConversationBufferMemory
-def respond(openai_api_key, openai_model, message, buffer_memory, chat_history):
     conversation = ConversationChain(
         llm = ChatOpenAI(
-            temperature=0.0,
-            model=openai_model,
-            openai_api_key=openai_api_key
         ),
         memory = buffer_memory
     )
-    response = conversation.predict(input=message)
-    chat_history.append([message, response])
-    return "", buffer_memory, chat_history
 with gr.Blocks(css="#component-0 { max-width: 900px; margin: auto; padding-top: 1.5rem; }") as demo:
@@ -24,12 +71,12 @@ with gr.Blocks(css="#component-0 { max-width: 900px; margin: auto; padding-top:
         openai_key = gr.Textbox(
             label="OpenAI Key",
             type="password",
-            placeholder="sk-a83jv6fn3x8ndm78b5W..."
         )
         model = gr.Dropdown(
-            ["gpt-4", "gpt-4-32k",
-                "gpt-3.5-turbo", "gpt-3.5-turbo-16k", "gpt-3.5-turbo-instruct",
-                "text-davinci-002", "text-davinci-003"],
             label="OpenAI Model",
             value="gpt-3.5-turbo",
             interactive=True
@@ -53,9 +100,33 @@ with gr.Blocks(css="#component-0 { max-width: 900px; margin: auto; padding-top:
                         scale=1,
                         min_width=0)
     # Event Handling
-    query.submit(respond, [openai_key, model, query, memory, chatbot], [query, memory, chatbot])
-    submit.click(respond, [openai_key, model, query, memory, chatbot], [query, memory, chatbot])
-if __name__ == "__main__":
-    demo.launch()

 import gradio as gr
+from threading import Thread
+from queue import Queue, Empty
+# from callbacks import StreamingGradioCallbackHandler, job_done
+from langchain.schema import SystemMessage
 from langchain.chat_models import ChatOpenAI
 from langchain.chains import ConversationChain
+from langchain.prompts import ChatPromptTemplate
 from langchain.memory import ConversationBufferMemory
+from langchain.callbacks.base import BaseCallbackHandler
+# huggingface.co/spaces/huggingface-projects/llama-2-13b-chat
+DEFAULT_SYSTEM_PROMPT = """\
+You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe.  \
+Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please \
+ensure that your responses are socially unbiased and positive in nature.\n\nIf a question does not make any sense, or \
+is not factually coherent, explain why instead of answering something not correct. If you don't know the answer \
+to a question, please don't share false information."""
+class QueueCallback(BaseCallbackHandler):
+    """Callback handler for streaming LLM responses to a queue."""
+    def __init__(self, q):
+        self.q = q
+    def on_llm_new_token(self, token: str, **kwargs) -> None:
+        print(token)
+        self.q.put(token)
+    def on_llm_end(self, *args, **kwargs) -> None:
+        print("Done")
+        return self.q.empty()
+def respond(openai_api_key, openai_model, creativity, max_tokens, message, buffer_memory, chat_history):
+    # print(buffer_memory.buffer)
+    chat_history.append([message, None])
+    q = Queue()
+    job_done = object()
+    callback = QueueCallback(q)
     conversation = ConversationChain(
         llm = ChatOpenAI(
+            model=openai_model,
+            max_tokens=max_tokens,
+            temperature=creativity,
+            openai_api_key=openai_api_key,
+            streaming=True,
+            callbacks=[callback]
         ),
         memory = buffer_memory
     )
+    def task():
+        resp = conversation.predict(input=message)
+        q.put(job_done)
+    thread = Thread(target=task)
+    thread.start()
+    chat_history[-1] = (chat_history[-1][0], "")
+    while True:
+        next_token = q.get(block=True) # Blocks until an input is available
+        if next_token is job_done:
+            break
+        chat_history[-1] = (chat_history[-1][0], chat_history[-1][1] + next_token)
+        yield "", buffer_memory, chat_history  # Yield the chatbot's response as a string
+    thread.join()
 with gr.Blocks(css="#component-0 { max-width: 900px; margin: auto; padding-top: 1.5rem; }") as demo:
         openai_key = gr.Textbox(
             label="OpenAI Key",
             type="password",
+            placeholder="sk-a83jv6fn3x8ndm78b5W...",
         )
         model = gr.Dropdown(
+            ["gpt-4",
+             "gpt-3.5-turbo", "gpt-3.5-turbo-16k", "gpt-3.5-turbo-instruct",
+             "text-davinci-002", "text-davinci-003"],
             label="OpenAI Model",
             value="gpt-3.5-turbo",
             interactive=True
                         scale=1,
                         min_width=0)
+    with gr.Accordion(label='Advanced options', open=False):
+        system_prompt = gr.Textbox(label='System prompt', value=DEFAULT_SYSTEM_PROMPT, lines=6)
+        max_new_tokens = gr.Slider(
+            label='Max new tokens',
+            minimum=1,
+            maximum=4096,
+            step=1,
+            value=2048,
+        )
+        temperature = gr.Slider(
+            label='Temperature',
+            minimum=0.0,
+            maximum=1.0,
+            step=0.1,
+            value=0.0,
+        )
+        memory_window = gr.Slider(
+            label='Converstaion Memory Window',
+            minimum=-1,
+            maximum=10,
+            step=1,
+            value=-1,
+            interactive=True
+        )
     # Event Handling
+    query.submit(respond, [openai_key, model, temperature, max_new_tokens, query, memory, chatbot], [query, memory, chatbot])
+    submit.click(respond, [openai_key, model, temperature, max_new_tokens, query, memory, chatbot], [query, memory, chatbot])
+demo.queue().launch()