import gradio as gr

from queue import Queue
from threading import Thread
from callbacks import StreamingGradioCallbackHandler, job_done

from langchain.chat_models import ChatOpenAI
from langchain.chains import ConversationChain
from langchain.memory import ConversationBufferMemory

# huggingface.co/spaces/huggingface-projects/llama-2-13b-chat
DEFAULT_SYSTEM_PROMPT = """\
You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe.  \
Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please \
ensure that your responses are socially unbiased and positive in nature.\n\nIf a question does not make any sense, or \
is not factually coherent, explain why instead of answering something not correct. If you don't know the answer \
to a question, please don't share false information."""

def respond(openai_api_key, openai_model, creativity, max_tokens, message, buffer_memory, chat_history):
    q = Queue()
    conversation = ConversationChain(
        llm = ChatOpenAI(
            streaming=True,
            model=openai_model,
            max_tokens=max_tokens,
            temperature=creativity,
            openai_api_key=openai_api_key,
            callbacks=[StreamingGradioCallbackHandler(q)]
        ), 
        memory = buffer_memory
    )
    chat_history.append([message, ""])

    thread = Thread(target=conversation.predict, kwargs={
        "input": message,
    })
    thread.start()

    while True:
        next_token = q.get(block=True) # Blocks until an input is available
        if next_token is job_done:
            break
        chat_history[-1] = (chat_history[-1][0], chat_history[-1][1] + next_token)
        yield "", buffer_memory, chat_history  # Yield the chatbot's response
    thread.join()


def init_buffer_memory():
    memory = ConversationBufferMemory()
    memory.save_context({"input": DEFAULT_SYSTEM_PROMPT}, {"output": "Of course!"})
    return memory


with gr.Blocks(css="#component-0 { max-width: 900px; margin: auto; padding-top: 1.5rem; }") as demo:
    gr.Markdown(
        """
        ![](/home/abdulla/gradio/banner.png)
        """
    )
    
    with gr.Group():
        with gr.Row(visible=True) as primary_settings:
            openai_key = gr.Textbox(
                container=False,
                type="password",
                placeholder="OpenAI Key: sk-a83jv6fn3x8ndm78b5W...",
            )
            model = gr.Dropdown(
                ["gpt-4", 
                "gpt-3.5-turbo", "gpt-3.5-turbo-16k", "gpt-3.5-turbo-instruct", 
                "text-davinci-002", "text-davinci-003"],
                container=False,
                value="gpt-3.5-turbo",
                interactive=True
            )
        
    with gr.Group() as chat:
        memory = gr.State(init_buffer_memory())
        chatbot = gr.Chatbot(label='Chatbot')
        with gr.Row():
            query = gr.Textbox(
                container=False,
                show_label=False,
                placeholder='Type a message...',
                scale=10,
            )
            submit = gr.Button(
                'Submit',
                variant='primary',
                scale=1,
                min_width=0
            )

    with gr.Row():
        regenerate = gr.Button("Regenerate")
        clear_history = gr.Button("Clear History")

    with gr.Accordion(label='Advanced options', open=False):
        system_prompt = gr.Textbox(label='System prompt', value=DEFAULT_SYSTEM_PROMPT, lines=6)
        max_new_tokens = gr.Slider(
            label='Max new tokens',
            minimum=1,
            maximum=4096,
            step=1,
            value=2048,
        )
        temperature = gr.Slider(
            label='Temperature',
            minimum=0.0,
            maximum=1.0,
            step=0.1,
            value=0.0,
        )
        memory_window = gr.Slider(
            label='Converstaion Memory Window',
            minimum=-1,
            maximum=10,
            step=1,
            value=-1,
            interactive=True
        )

    # Event Handling
    query.submit(respond, [openai_key, model, temperature, max_new_tokens, query, memory, chatbot], [query, memory, chatbot])
    submit.click(respond, [openai_key, model, temperature, max_new_tokens, query, memory, chatbot], [query, memory, chatbot])

    regenerate.click()
    
demo.queue().launch()