import os import streamlit as st from llama_cpp import Llama from huggingface_hub import hf_hub_download # Hugging FaceのAPIトークンを設定 os.environ["HUGGINGFACE_TOKEN"] = os.getenv("HUGGINGFACE_TOKEN") model_name_or_path = "mmnga/ELYZA-japanese-Llama-2-7b-fast-instruct-gguf" model_basename = "ELYZA-japanese-Llama-2-7b-fast-instruct-q5_K_M.gguf" model_path = hf_hub_download(repo_id=model_name_or_path, filename=model_basename, revision="main") llama = Llama(model_path, n_ctx=5120) def predict(messages): # Llamaでの回答を取得(ストリーミングオン) streamer = llama.create_chat_completion(messages, stream=True, max_tokens=512) partial_message = "" for msg in streamer: message = msg['choices'][0]['delta'] print(f"message: {message}") if 'content' in message: partial_message += message['content'] yield partial_message def main(): st.title("Chat with Elyza!") # Session state for retaining messages if 'messages' not in st.session_state: st.session_state.messages = [] # Display chat messages from history on app rerun for message in st.session_state.messages: with st.chat_message(message["role"]): st.markdown(f"{message['content']}") # Input for the user message user_message = st.chat_input("Your Message") # React to user input if user_message: # Display user message in chat message container with st.chat_message("user"): st.markdown(f"{user_message}") # Add user message to chat history st.session_state.messages.append({"role": "user", "content": user_message}) with st.chat_message("assistant"): message_placeholder = st.empty() full_response = "" for char in predict([{"role": m["role"], "content": m["content"]} for m in st.session_state.messages]): full_response = char #+= char message_placeholder.markdown(full_response + " ❚ ") message_placeholder.markdown(full_response) st.session_state.messages.append({"role": "assistant", "content": full_response}) if __name__ == "__main__": main()