Spaces:

aquibmoin
/

aimlify2

Sleeping

File size: 3,989 Bytes

561faf0
 
 
 
 
8c7f129
561faf0
 
 
8c7f129
561faf0
 
 
 
 
 
 
 
 
 
 
 
2cec527
561faf0
bc5544c
561faf0
bc5544c
8c7f129
 
f727ca3
 
af55e76
296ef5c
e783c56
af55e76
5f35869
561faf0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
296ef5c
561faf0
 
 
 
 
9193677
561faf0

import streamlit as st
import replicate
import os

# App title
st.set_page_config(page_title="🦙💬 Meta Llama Inference")

# Replicate Credentials
with st.sidebar:
    st.title('🦙💬 Meta Llama Inference')
    if 'REPLICATE_API_TOKEN' in st.secrets:
        st.success('API key already provided!', icon='✅')
        replicate_api = st.secrets['REPLICATE_API_TOKEN']
    else:
        replicate_api = st.text_input('Enter Replicate API token:', type='password')
        if not (replicate_api.startswith('r8_') and len(replicate_api)==40):
            st.warning('Please enter your credentials!', icon='⚠️')
        else:
            st.success('Proceed to entering your prompt message!', icon='👉')
    os.environ['REPLICATE_API_TOKEN'] = replicate_api

    st.subheader('Models and parameters')
    selected_model = st.sidebar.selectbox('Choose a Meta Llama model', ['Llama2-7B', 'Llama2-13B', 'Llama3-8B-Instruct', 'Llama3-70B-Instruct'], key='selected_model')
    if selected_model == 'Llama2-7B':
        llm = 'meta/llama-2-7b-chat'
    elif selected_model == 'Llama2-13B':
        llm = 'meta/llama-2-13b-chat'
    elif selected_model == 'Llama3-8B-Instruct':
        llm = 'meta/meta-llama-3-8b-instruct'
    elif selected_model == 'Llama3-70B-Instruct':
        llm = 'meta/meta-llama-3-70b-instruct'
    
    st.sidebar.subheader("System Prompt")
    user_input = st.sidebar.text_area("Context for Fine-tuning:", placeholder="e.g. You are a Space Mission Analyst...", height=10)
    
    temperature = st.sidebar.slider('temperature', min_value=0.01, max_value=1.0, value=0.1, step=0.01)
    top_p = st.sidebar.slider('top_p', min_value=0.01, max_value=1.0, value=0.9, step=0.01)
    max_length = st.sidebar.slider('max_length', min_value=32, max_value=1000, value=400, step=8)
    #st.markdown('📖 Learn how to build this app in this [blog](https://blog.streamlit.io/how-to-build-a-llama-2-chatbot/)!')

# Store LLM generated responses
if "messages" not in st.session_state.keys():
    st.session_state.messages = [{"role": "assistant", "content": "How may I assist you today?"}]

# Display or clear chat messages
for message in st.session_state.messages:
    with st.chat_message(message["role"]):
        st.write(message["content"])

def clear_chat_history():
    st.session_state.messages = [{"role": "assistant", "content": "How may I assist you today?"}]
st.sidebar.button('Clear Chat History', on_click=clear_chat_history)

# Function for generating LLaMA2 response. Refactored from https://github.com/a16z-infra/llama2-chatbot
def generate_llama2_response(prompt_input):
    string_dialogue = user_input
    for dict_message in st.session_state.messages:
        if dict_message["role"] == "user":
            string_dialogue += "User: " + dict_message["content"] + "\n\n"
        else:
            string_dialogue += "Assistant: " + dict_message["content"] + "\n\n"
    output = replicate.run(llm, 
                           input={"prompt": f"{string_dialogue} {prompt_input} Assistant: ",
                                  "temperature":temperature, "top_p":top_p, "max_length":max_length, "repetition_penalty":1})
    return output

# User-provided prompt
if prompt := st.chat_input(disabled=not replicate_api):
    st.session_state.messages.append({"role": "user", "content": prompt})
    with st.chat_message("user"):
        st.write(prompt)

# Generate a new response if last message is not from assistant
if st.session_state.messages[-1]["role"] != "assistant":
    with st.chat_message("assistant"):
        with st.spinner("Thinking..."):
            response = generate_llama2_response(prompt)
            placeholder = st.empty()
            full_response = ''
            for item in response:
                full_response += item
                placeholder.markdown(full_response)
            placeholder.markdown(full_response)
    message = {"role": "assistant", "content": full_response}
    st.session_state.messages.append(message)