Spaces:
Sleeping
Sleeping
abdullahmeda
commited on
Commit
•
806f9a8
1
Parent(s):
ad7df47
added illustration
Browse files- app.py +52 -55
- banner.png +0 -0
app.py
CHANGED
@@ -1,15 +1,12 @@
|
|
1 |
import gradio as gr
|
2 |
|
|
|
3 |
from threading import Thread
|
4 |
-
from
|
5 |
-
# from callbacks import StreamingGradioCallbackHandler, job_done
|
6 |
|
7 |
-
from langchain.schema import SystemMessage
|
8 |
from langchain.chat_models import ChatOpenAI
|
9 |
from langchain.chains import ConversationChain
|
10 |
-
from langchain.prompts import ChatPromptTemplate
|
11 |
from langchain.memory import ConversationBufferMemory
|
12 |
-
from langchain.callbacks.base import BaseCallbackHandler
|
13 |
|
14 |
# huggingface.co/spaces/huggingface-projects/llama-2-13b-chat
|
15 |
DEFAULT_SYSTEM_PROMPT = """\
|
@@ -19,74 +16,66 @@ ensure that your responses are socially unbiased and positive in nature.\n\nIf a
|
|
19 |
is not factually coherent, explain why instead of answering something not correct. If you don't know the answer \
|
20 |
to a question, please don't share false information."""
|
21 |
|
22 |
-
class QueueCallback(BaseCallbackHandler):
|
23 |
-
"""Callback handler for streaming LLM responses to a queue."""
|
24 |
-
|
25 |
-
def __init__(self, q):
|
26 |
-
self.q = q
|
27 |
-
|
28 |
-
def on_llm_new_token(self, token: str, **kwargs) -> None:
|
29 |
-
print(token)
|
30 |
-
self.q.put(token)
|
31 |
-
|
32 |
-
def on_llm_end(self, *args, **kwargs) -> None:
|
33 |
-
print("Done")
|
34 |
-
return self.q.empty()
|
35 |
-
|
36 |
def respond(openai_api_key, openai_model, creativity, max_tokens, message, buffer_memory, chat_history):
|
37 |
-
# print(buffer_memory.buffer)
|
38 |
-
chat_history.append([message, None])
|
39 |
q = Queue()
|
40 |
-
job_done = object()
|
41 |
-
callback = QueueCallback(q)
|
42 |
conversation = ConversationChain(
|
43 |
llm = ChatOpenAI(
|
|
|
44 |
model=openai_model,
|
45 |
max_tokens=max_tokens,
|
46 |
temperature=creativity,
|
47 |
openai_api_key=openai_api_key,
|
48 |
-
|
49 |
-
callbacks=[callback]
|
50 |
),
|
51 |
memory = buffer_memory
|
52 |
)
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
|
|
57 |
thread.start()
|
58 |
-
|
59 |
while True:
|
60 |
next_token = q.get(block=True) # Blocks until an input is available
|
61 |
if next_token is job_done:
|
62 |
break
|
63 |
chat_history[-1] = (chat_history[-1][0], chat_history[-1][1] + next_token)
|
64 |
-
yield "", buffer_memory, chat_history # Yield the chatbot's response
|
65 |
thread.join()
|
66 |
|
67 |
|
|
|
|
|
|
|
|
|
|
|
|
|
68 |
with gr.Blocks(css="#component-0 { max-width: 900px; margin: auto; padding-top: 1.5rem; }") as demo:
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
|
|
|
|
|
|
88 |
with gr.Group() as chat:
|
89 |
-
memory = gr.State(
|
90 |
chatbot = gr.Chatbot(label='Chatbot')
|
91 |
with gr.Row():
|
92 |
query = gr.Textbox(
|
@@ -95,10 +84,16 @@ with gr.Blocks(css="#component-0 { max-width: 900px; margin: auto; padding-top:
|
|
95 |
placeholder='Type a message...',
|
96 |
scale=10,
|
97 |
)
|
98 |
-
submit = gr.Button(
|
99 |
-
|
100 |
-
|
101 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
102 |
|
103 |
with gr.Accordion(label='Advanced options', open=False):
|
104 |
system_prompt = gr.Textbox(label='System prompt', value=DEFAULT_SYSTEM_PROMPT, lines=6)
|
@@ -128,5 +123,7 @@ with gr.Blocks(css="#component-0 { max-width: 900px; margin: auto; padding-top:
|
|
128 |
# Event Handling
|
129 |
query.submit(respond, [openai_key, model, temperature, max_new_tokens, query, memory, chatbot], [query, memory, chatbot])
|
130 |
submit.click(respond, [openai_key, model, temperature, max_new_tokens, query, memory, chatbot], [query, memory, chatbot])
|
|
|
|
|
131 |
|
132 |
demo.queue().launch()
|
|
|
1 |
import gradio as gr
|
2 |
|
3 |
+
from queue import Queue
|
4 |
from threading import Thread
|
5 |
+
from callbacks import StreamingGradioCallbackHandler, job_done
|
|
|
6 |
|
|
|
7 |
from langchain.chat_models import ChatOpenAI
|
8 |
from langchain.chains import ConversationChain
|
|
|
9 |
from langchain.memory import ConversationBufferMemory
|
|
|
10 |
|
11 |
# huggingface.co/spaces/huggingface-projects/llama-2-13b-chat
|
12 |
DEFAULT_SYSTEM_PROMPT = """\
|
|
|
16 |
is not factually coherent, explain why instead of answering something not correct. If you don't know the answer \
|
17 |
to a question, please don't share false information."""
|
18 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
19 |
def respond(openai_api_key, openai_model, creativity, max_tokens, message, buffer_memory, chat_history):
|
|
|
|
|
20 |
q = Queue()
|
|
|
|
|
21 |
conversation = ConversationChain(
|
22 |
llm = ChatOpenAI(
|
23 |
+
streaming=True,
|
24 |
model=openai_model,
|
25 |
max_tokens=max_tokens,
|
26 |
temperature=creativity,
|
27 |
openai_api_key=openai_api_key,
|
28 |
+
callbacks=[StreamingGradioCallbackHandler(q)]
|
|
|
29 |
),
|
30 |
memory = buffer_memory
|
31 |
)
|
32 |
+
chat_history.append([message, ""])
|
33 |
+
|
34 |
+
thread = Thread(target=conversation.predict, kwargs={
|
35 |
+
"input": message,
|
36 |
+
})
|
37 |
thread.start()
|
38 |
+
|
39 |
while True:
|
40 |
next_token = q.get(block=True) # Blocks until an input is available
|
41 |
if next_token is job_done:
|
42 |
break
|
43 |
chat_history[-1] = (chat_history[-1][0], chat_history[-1][1] + next_token)
|
44 |
+
yield "", buffer_memory, chat_history # Yield the chatbot's response
|
45 |
thread.join()
|
46 |
|
47 |
|
48 |
+
def init_buffer_memory():
|
49 |
+
memory = ConversationBufferMemory()
|
50 |
+
memory.save_context({"input": DEFAULT_SYSTEM_PROMPT}, {"output": "Of course!"})
|
51 |
+
return memory
|
52 |
+
|
53 |
+
|
54 |
with gr.Blocks(css="#component-0 { max-width: 900px; margin: auto; padding-top: 1.5rem; }") as demo:
|
55 |
+
gr.Markdown(
|
56 |
+
"""
|
57 |
+
![](/home/abdulla/gradio/banner.png)
|
58 |
+
"""
|
59 |
+
)
|
60 |
+
|
61 |
+
with gr.Group():
|
62 |
+
with gr.Row(visible=True) as primary_settings:
|
63 |
+
openai_key = gr.Textbox(
|
64 |
+
container=False,
|
65 |
+
type="password",
|
66 |
+
placeholder="OpenAI Key: sk-a83jv6fn3x8ndm78b5W...",
|
67 |
+
)
|
68 |
+
model = gr.Dropdown(
|
69 |
+
["gpt-4",
|
70 |
+
"gpt-3.5-turbo", "gpt-3.5-turbo-16k", "gpt-3.5-turbo-instruct",
|
71 |
+
"text-davinci-002", "text-davinci-003"],
|
72 |
+
container=False,
|
73 |
+
value="gpt-3.5-turbo",
|
74 |
+
interactive=True
|
75 |
+
)
|
76 |
+
|
77 |
with gr.Group() as chat:
|
78 |
+
memory = gr.State(init_buffer_memory())
|
79 |
chatbot = gr.Chatbot(label='Chatbot')
|
80 |
with gr.Row():
|
81 |
query = gr.Textbox(
|
|
|
84 |
placeholder='Type a message...',
|
85 |
scale=10,
|
86 |
)
|
87 |
+
submit = gr.Button(
|
88 |
+
'Submit',
|
89 |
+
variant='primary',
|
90 |
+
scale=1,
|
91 |
+
min_width=0
|
92 |
+
)
|
93 |
+
|
94 |
+
with gr.Row():
|
95 |
+
regenerate = gr.Button("Regenerate")
|
96 |
+
clear_history = gr.Button("Clear History")
|
97 |
|
98 |
with gr.Accordion(label='Advanced options', open=False):
|
99 |
system_prompt = gr.Textbox(label='System prompt', value=DEFAULT_SYSTEM_PROMPT, lines=6)
|
|
|
123 |
# Event Handling
|
124 |
query.submit(respond, [openai_key, model, temperature, max_new_tokens, query, memory, chatbot], [query, memory, chatbot])
|
125 |
submit.click(respond, [openai_key, model, temperature, max_new_tokens, query, memory, chatbot], [query, memory, chatbot])
|
126 |
+
|
127 |
+
regenerate.click()
|
128 |
|
129 |
demo.queue().launch()
|
banner.png
ADDED