import gradio as gr from queue import Queue from threading import Thread from callbacks import StreamingGradioCallbackHandler, job_done from langchain.chat_models import ChatOpenAI from langchain.chains import ConversationChain from langchain.memory import ConversationBufferMemory # huggingface.co/spaces/huggingface-projects/llama-2-13b-chat DEFAULT_SYSTEM_PROMPT = """\ You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. \ Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please \ ensure that your responses are socially unbiased and positive in nature.\n\nIf a question does not make any sense, or \ is not factually coherent, explain why instead of answering something not correct. If you don't know the answer \ to a question, please don't share false information.""" def respond(openai_api_key, openai_model, creativity, max_tokens, message, buffer_memory, chat_history): if openai_api_key == "": gr.Warning('Kindly enter a valid OPENAI API key!') return message, buffer_memory, chat_history q = Queue() conversation = ConversationChain( llm = ChatOpenAI( streaming=True, model=openai_model, max_tokens=max_tokens, temperature=creativity, openai_api_key=openai_api_key, callbacks=[StreamingGradioCallbackHandler(q)] ), memory = buffer_memory ) chat_history.append([message, ""]) thread = Thread(target=conversation.predict, kwargs={ "input": message, }) thread.start() while True: next_token = q.get(block=True) # Blocks until an input is available if next_token is job_done: break chat_history[-1] = (chat_history[-1][0], chat_history[-1][1] + next_token) yield "", buffer_memory, chat_history # Yield the chatbot's response thread.join() def init_buffer_memory(): memory = ConversationBufferMemory() memory.save_context({"input": DEFAULT_SYSTEM_PROMPT}, {"output": "Of course!"}) return memory def clear(): return init_buffer_memory(), [] def regenerate_response(): # TODO: Add functionality pass complete_memory = ConversationBufferMemory() complete_memory.save_context({"input": DEFAULT_SYSTEM_PROMPT}, {"output": "Of course!"}) with gr.Blocks(css=""" #component-0 { max-width: 900px; margin: auto; padding-top: 1.5rem; } #duplicate-button { margin: auto; color: white; background: #1565c0; border-radius: 100vh; } """) as demo: gr.Markdown( """ ![](https://huggingface.co/spaces/abdullahmeda/OpenChat/resolve/main/banner.png) This Huggingface Gradio Space provides you access to all OpenAI API model that are readily available to the public with custom System Messages. Please note that you would be needing an OPENAI API key to successfully use this space. """ ) gr.DuplicateButton(value="Duplicate Space for private use", elem_id="duplicate-button") with gr.Group(): with gr.Row(visible=True) as primary_settings: openai_key = gr.Textbox( container=False, type="password", placeholder="OpenAI Key: sk-a83jv6fn3x8ndm78b5W...", ) model = gr.Dropdown( ["gpt-4", "gpt-4-1106-preview", "gpt-3.5-turbo", "gpt-3.5-turbo-16k", "gpt-3.5-turbo-instruct", "text-davinci-002", "text-davinci-003"], container=False, value="gpt-3.5-turbo", interactive=True ) with gr.Group() as chat: memory = gr.State(complete_memory) chatbot = gr.Chatbot(label='Chatbot') with gr.Row(): query = gr.Textbox( container=False, show_label=False, placeholder='Type a message...', scale=10, ) submit = gr.Button( 'Submit', variant='primary', scale=1, min_width=0 ) with gr.Row(): regenerate = gr.Button("Regenerate") clear_history = gr.Button("Clear History") with gr.Accordion(label='Advanced options', open=False): system_prompt = gr.Textbox(label='System prompt', value=DEFAULT_SYSTEM_PROMPT, lines=6) max_new_tokens = gr.Slider( label='Max new tokens', minimum=1, maximum=4096, step=1, value=4096, ) temperature = gr.Slider( label='Temperature', minimum=0.0, maximum=1.0, step=0.1, value=0.0, ) # memory_window = gr.Slider( # label='Converstaion Memory Window', # minimum=-1, # maximum=10, # step=1, # value=-1, # interactive=True # ) # Event Handling query.submit(respond, [openai_key, model, temperature, max_new_tokens, query, memory, chatbot], [query, memory, chatbot]) submit.click(respond, [openai_key, model, temperature, max_new_tokens, query, memory, chatbot], [query, memory, chatbot]) regenerate.click(regenerate_response, None, None) clear_history.click(clear, None, [memory, chatbot]) demo.queue().launch()