|
import os |
|
import streamlit as st |
|
from openai import OpenAI |
|
import time |
|
import re |
|
|
|
|
|
API_KEY = os.getenv("API_KEY") |
|
URL = os.getenv("URL") |
|
client = OpenAI( |
|
api_key=API_KEY, |
|
base_url=URL |
|
) |
|
|
|
|
|
MODELS = [ |
|
"Meta-Llama-3.1-405B-Instruct", |
|
"Meta-Llama-3.1-70B-Instruct", |
|
"Meta-Llama-3.1-8B-Instruct" |
|
] |
|
|
|
|
|
SEARCH_STRATEGY = [ |
|
"None", |
|
"Greedy-Best-Score", |
|
"Iterative-Refinement", |
|
"Monte-Carlo-Tree-Search" |
|
] |
|
|
|
def chat_with_ai(message, chat_history, system_prompt): |
|
messages = [ |
|
{"role": "system", "content": system_prompt}, |
|
] |
|
|
|
for human, ai, _ in chat_history: |
|
messages.append({"role": "user", "content": human}) |
|
messages.append({"role": "assistant", "content": ai}) |
|
|
|
messages.append({"role": "user", "content": message}) |
|
|
|
return messages |
|
|
|
def respond(message, chat_history, model, system_prompt, thinking_budget): |
|
messages = chat_with_ai(message, chat_history, system_prompt.format(budget = thinking_budget)) |
|
response = "" |
|
start_time = time.time() |
|
with st.spinner("AI is thinking..."): |
|
for chunk in client.chat.completions.create( |
|
model=model, |
|
messages=messages, |
|
stream=True |
|
): |
|
content = chunk.choices[0].delta.content or "" |
|
response += content |
|
yield response, time.time() - start_time |
|
|
|
def parse_and_display_response(response): |
|
|
|
answer_match = re.search(r'<answer>(.*?)</answer>', response, re.DOTALL) |
|
reflection_match = re.search(r'<reflection>(.*?)</reflection>', response, re.DOTALL) |
|
|
|
answer = answer_match.group(1).strip() if answer_match else "" |
|
reflection = reflection_match.group(1).strip() if reflection_match else "" |
|
|
|
|
|
response = re.sub(r'<answer>.*?</answer>', '', response, flags=re.DOTALL) |
|
response = re.sub(r'<reflection>.*?</reflection>', '', response, flags=re.DOTALL) |
|
response = re.sub(r'<reward>.*?</reward>\s*$', '', response, flags=re.DOTALL) |
|
|
|
|
|
steps = re.findall(r'<step>(.*?)</step>', response, re.DOTALL) |
|
|
|
with st.expander("Show thinking process", expanded=False): |
|
for i, step in enumerate(steps, 1): |
|
st.markdown(f"**Step {i}:**") |
|
st.write(step.strip()) |
|
st.markdown("---") |
|
|
|
|
|
if answer: |
|
st.markdown("### Answer:") |
|
st.write(answer) |
|
|
|
if reflection: |
|
st.markdown("### Reflection:") |
|
st.write(reflection) |
|
|
|
def display_message_with_code_blocks(message): |
|
|
|
if '<step>' in message or '<answer>' in message or '<reflection>' in message: |
|
parse_and_display_response(message) |
|
else: |
|
|
|
parts = re.split(r'(```[\s\S]*?```)', message) |
|
|
|
for part in parts: |
|
if part.startswith('```') and part.endswith('```'): |
|
|
|
code = part.strip('`').strip() |
|
lang = code.split('\n')[0] if '\n' in code else '' |
|
code = '\n'.join(code.split('\n')[1:]) if lang else code |
|
st.code(code, language=lang, line_numbers=True) |
|
else: |
|
|
|
st.write(part) |
|
|
|
def main(): |
|
st.set_page_config(page_title="AI Chatbot", layout="wide") |
|
|
|
st.title("Llama3.1-Instruct-O1") |
|
st.markdown("<a href='https://sambanova.ai/fast-api?api_ref=907266' target='_blank'>Powered by Llama3.1 models through SN Cloud</a>", unsafe_allow_html=True) |
|
|
|
if "chat_history" not in st.session_state: |
|
st.session_state.chat_history = [] |
|
|
|
col1, col2 = st.columns([1, 1]) |
|
|
|
with col1: |
|
model = st.selectbox("Select Model", MODELS, index=0) |
|
thinking_budget = st.slider("Thinking Budget", 1, 100, 1, help="Control how much it thinks, pick between 1 to 100 inclusive") |
|
|
|
with col2: |
|
system_prompt = st.text_area( |
|
"System Prompt", |
|
value=""" |
|
You are a helpful assistant in normal conversation. |
|
When given a problem to solve, you are an expert problem-solving assistant. Your task is to provide a detailed, step-by-step solution to a given question. Follow these instructions carefully: |
|
|
|
1. Read the given question carefully and reset counter between <count> and </count> to {budget} |
|
2. Generate a detailed, logical step-by-step solution. |
|
3. Enclose each step of your solution within <step> and </step> tags. |
|
4. You are allowed to use at most {budget} steps (starting budget), keep track of it by counting down within tags <count> </count>, STOP GENERATING MORE STEPS when hitting 0, you don't have to use all of them. |
|
5. Do a self-reflection when you are unsure about how to proceed, based on the self-reflection and reward, decides whether you need to return to the previous steps. |
|
6. After completing the solution steps, reorganize and synthesize the steps into the final answer within <answer> and </answer> tags. |
|
7. Provide a critical, honest and subjective self-evaluation of your reasoning process within <reflection> and </reflection> tags. |
|
8. Assign a quality score to your solution as a float between 0.0 (lowest quality) and 1.0 (highest quality), enclosed in <reward> and </reward> tags. |
|
|
|
Example format: |
|
<count> [starting budget] </count> |
|
|
|
<step> [Content of step 1] </step> |
|
<count> [remaining budget] </count> |
|
|
|
<step> [Content of step 2] </step> |
|
<reflection> [Evaluation of the steps so far] </reflection> |
|
<reward> [Float between 0.0 and 1.0] </reward> |
|
<count> [remaining budget] </count> |
|
|
|
<step> [Content of step 3 or Content of some previous step] </step> |
|
<count> [remaining budget] </count> |
|
|
|
... |
|
|
|
<step> [Content of final step] </step> |
|
<count> [remaining budget] </count> |
|
|
|
<answer> [Final Answer] </answer> |
|
|
|
<reflection> [Evaluation of the solution] </reflection> |
|
|
|
<reward> [Float between 0.0 and 1.0] </reward> |
|
""", |
|
height=200 |
|
) |
|
|
|
st.markdown("---") |
|
|
|
for human, ai, thinking_time in st.session_state.chat_history: |
|
with st.chat_message("human"): |
|
st.write(human) |
|
with st.chat_message("ai"): |
|
display_message_with_code_blocks(ai) |
|
st.caption(f"Thinking time: {thinking_time:.2f} s") |
|
|
|
message = st.chat_input("Type your message here...") |
|
|
|
if message: |
|
with st.chat_message("human"): |
|
st.write(message) |
|
|
|
with st.chat_message("ai"): |
|
response_placeholder = st.empty() |
|
time_placeholder = st.empty() |
|
for response, elapsed_time in respond(message, st.session_state.chat_history, model, system_prompt, thinking_budget): |
|
response_placeholder.markdown(response) |
|
time_placeholder.caption(f"Thinking time: {elapsed_time:.2f} s") |
|
response_placeholder.empty() |
|
time_placeholder.empty() |
|
display_message_with_code_blocks(response) |
|
time_placeholder.caption(f"Thinking time: {elapsed_time:.2f} s") |
|
|
|
st.session_state.chat_history.append((message, response, elapsed_time)) |
|
|
|
if st.button("Clear Chat"): |
|
st.session_state.chat_history = [] |
|
st.experimental_rerun() |
|
|
|
if __name__ == "__main__": |
|
main() |