import gradio as gr from queue import Queue from threading import Thread from callbacks import StreamingGradioCallbackHandler, job_done from langchain.chat_models import ChatOpenAI from langchain.chains import ConversationChain from langchain.memory import ConversationBufferMemory # huggingface.co/spaces/huggingface-projects/llama-2-13b-chat DEFAULT_SYSTEM_PROMPT = """\ You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. \ Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please \ ensure that your responses are socially unbiased and positive in nature.\n\nIf a question does not make any sense, or \ is not factually coherent, explain why instead of answering something not correct. If you don't know the answer \ to a question, please don't share false information.""" def respond(openai_api_key, openai_model, creativity, max_tokens, message, buffer_memory, chat_history): q = Queue() conversation = ConversationChain( llm = ChatOpenAI( streaming=True, model=openai_model, max_tokens=max_tokens, temperature=creativity, openai_api_key=openai_api_key, callbacks=[StreamingGradioCallbackHandler(q)] ), memory = buffer_memory ) chat_history.append([message, ""]) thread = Thread(target=conversation.predict, kwargs={ "input": message, }) thread.start() while True: next_token = q.get(block=True) # Blocks until an input is available if next_token is job_done: break chat_history[-1] = (chat_history[-1][0], chat_history[-1][1] + next_token) yield "", buffer_memory, chat_history # Yield the chatbot's response thread.join() def init_buffer_memory(): memory = ConversationBufferMemory() memory.save_context({"input": DEFAULT_SYSTEM_PROMPT}, {"output": "Of course!"}) return memory with gr.Blocks(css="#component-0 { max-width: 900px; margin: auto; padding-top: 1.5rem; }") as demo: gr.Markdown( """ ![](/home/abdulla/gradio/banner.png) """ ) with gr.Group(): with gr.Row(visible=True) as primary_settings: openai_key = gr.Textbox( container=False, type="password", placeholder="OpenAI Key: sk-a83jv6fn3x8ndm78b5W...", ) model = gr.Dropdown( ["gpt-4", "gpt-3.5-turbo", "gpt-3.5-turbo-16k", "gpt-3.5-turbo-instruct", "text-davinci-002", "text-davinci-003"], container=False, value="gpt-3.5-turbo", interactive=True ) with gr.Group() as chat: memory = gr.State(init_buffer_memory()) chatbot = gr.Chatbot(label='Chatbot') with gr.Row(): query = gr.Textbox( container=False, show_label=False, placeholder='Type a message...', scale=10, ) submit = gr.Button( 'Submit', variant='primary', scale=1, min_width=0 ) with gr.Row(): regenerate = gr.Button("Regenerate") clear_history = gr.Button("Clear History") with gr.Accordion(label='Advanced options', open=False): system_prompt = gr.Textbox(label='System prompt', value=DEFAULT_SYSTEM_PROMPT, lines=6) max_new_tokens = gr.Slider( label='Max new tokens', minimum=1, maximum=4096, step=1, value=2048, ) temperature = gr.Slider( label='Temperature', minimum=0.0, maximum=1.0, step=0.1, value=0.0, ) memory_window = gr.Slider( label='Converstaion Memory Window', minimum=-1, maximum=10, step=1, value=-1, interactive=True ) # Event Handling query.submit(respond, [openai_key, model, temperature, max_new_tokens, query, memory, chatbot], [query, memory, chatbot]) submit.click(respond, [openai_key, model, temperature, max_new_tokens, query, memory, chatbot], [query, memory, chatbot]) regenerate.click() demo.queue().launch()