import os
import streamlit as st
from llama_cpp import Llama
from huggingface_hub import hf_hub_download

# Hugging FaceのAPIトークンを設定
os.environ["HUGGINGFACE_TOKEN"] = os.getenv("HUGGINGFACE_TOKEN")

model_name_or_path = "mmnga/ELYZA-japanese-Llama-2-7b-fast-instruct-gguf"
model_basename = "ELYZA-japanese-Llama-2-7b-fast-instruct-q5_K_M.gguf"

model_path = hf_hub_download(repo_id=model_name_or_path, filename=model_basename, revision="main")
llama = Llama(model_path, n_ctx=5120)

def predict(messages):
    # Llamaでの回答を取得（ストリーミングオン）
    streamer = llama.create_chat_completion(messages, stream=True, max_tokens=512)

    partial_message = ""
    for msg in streamer:
        message = msg['choices'][0]['delta']
        print(f"message: {message}")
        if 'content' in message:
            partial_message += message['content']
            yield partial_message


def main():
    st.title("Chat with Elyza!")

    # Session state for retaining messages
    if 'messages' not in st.session_state:
        st.session_state.messages = []

    # Display chat messages from history on app rerun
    for message in st.session_state.messages:
        with st.chat_message(message["role"]):
            st.markdown(f"{message['content']}")

    # Input for the user message
    user_message = st.chat_input("Your Message")

    # React to user input
    if user_message:
        # Display user message in chat message container
        with st.chat_message("user"):
            st.markdown(f"{user_message}")
        # Add user message to chat history
        st.session_state.messages.append({"role": "user", "content": user_message})

        with st.chat_message("assistant"):
            message_placeholder = st.empty()
            full_response = ""

            for char in predict([{"role": m["role"], "content": m["content"]} for m in st.session_state.messages]):
                full_response = char #+= char
                message_placeholder.markdown(full_response + " ❚ ")

            message_placeholder.markdown(full_response)

        st.session_state.messages.append({"role": "assistant", "content": full_response})


if __name__ == "__main__":
    main()