kz919 akhaliq HF staff commited on
Commit
aee6751
1 Parent(s): fe70469

fix gradio demo issue and not use chatbot component (#3)

Browse files

- fix gradio demo issue and not use chatbot component (eb37cc8da5a2a15ba1535f80e1ced3bd59b95d14)


Co-authored-by: AK <[email protected]>

Files changed (1) hide show
  1. app.py +177 -170
app.py CHANGED
@@ -1,16 +1,8 @@
1
- import os
2
- import streamlit as st
3
- from openai import OpenAI
4
  import time
5
  import re
6
-
7
- # Set up API key
8
- API_KEY = os.getenv("API_KEY")
9
- URL = os.getenv("URL")
10
- client = OpenAI(
11
- api_key=API_KEY,
12
- base_url=URL
13
- )
14
 
15
  # Available models
16
  MODELS = [
@@ -19,181 +11,196 @@ MODELS = [
19
  "Meta-Llama-3.1-8B-Instruct"
20
  ]
21
 
22
- # Available search strategies
23
- SEARCH_STRATEGY = [
24
- "None",
25
- "Greedy-Best-Score",
26
- "Iterative-Refinement",
27
- "Monte-Carlo-Tree-Search"
28
- ]
29
 
30
  def chat_with_ai(message, chat_history, system_prompt):
31
  messages = [
32
  {"role": "system", "content": system_prompt},
33
  ]
34
-
35
- for human, ai, _ in chat_history:
36
  messages.append({"role": "user", "content": human})
37
  messages.append({"role": "assistant", "content": ai})
38
-
39
  messages.append({"role": "user", "content": message})
40
-
41
  return messages
42
 
43
- def respond(message, chat_history, model, system_prompt, thinking_budget):
44
- messages = chat_with_ai(message, chat_history, system_prompt.format(budget = thinking_budget))
45
- response = ""
 
46
  start_time = time.time()
47
- with st.spinner("AI is thinking..."):
48
- for chunk in client.chat.completions.create(
 
 
49
  model=model,
50
  messages=messages,
51
- stream=True
52
- ):
53
- content = chunk.choices[0].delta.content or ""
54
- response += content
55
- yield response, time.time() - start_time
56
-
57
- def parse_and_display_response(response):
58
- # Extract answer and reflection
 
 
 
 
59
  answer_match = re.search(r'<answer>(.*?)</answer>', response, re.DOTALL)
60
  reflection_match = re.search(r'<reflection>(.*?)</reflection>', response, re.DOTALL)
61
-
62
  answer = answer_match.group(1).strip() if answer_match else ""
63
  reflection = reflection_match.group(1).strip() if reflection_match else ""
64
-
65
- # Remove answer, reflection, and final reward from the main response
66
- response = re.sub(r'<answer>.*?</answer>', '', response, flags=re.DOTALL)
67
- response = re.sub(r'<reflection>.*?</reflection>', '', response, flags=re.DOTALL)
68
- response = re.sub(r'<reward>.*?</reward>\s*$', '', response, flags=re.DOTALL)
69
-
70
- # Extract and display steps
71
  steps = re.findall(r'<step>(.*?)</step>', response, re.DOTALL)
72
-
73
- with st.expander("Show thinking process", expanded=False):
74
- for i, step in enumerate(steps, 1):
75
- st.markdown(f"**Step {i}:**")
76
- st.write(step.strip())
77
- st.markdown("---")
78
-
79
- # Display answer and reflection
80
- if answer:
81
- st.markdown("### Answer:")
82
- st.write(answer)
83
-
84
- if reflection:
85
- st.markdown("### Reflection:")
86
- st.write(reflection)
87
-
88
- def display_message_with_code_blocks(message):
89
- # First, check if the message contains the special tags
90
- if '<step>' in message or '<answer>' in message or '<reflection>' in message:
91
- parse_and_display_response(message)
92
- else:
93
- # If not, use the original display logic
94
- parts = re.split(r'(```[\s\S]*?```)', message)
95
-
96
- for part in parts:
97
- if part.startswith('```') and part.endswith('```'):
98
- # This is a code block
99
- code = part.strip('`').strip()
100
- lang = code.split('\n')[0] if '\n' in code else ''
101
- code = '\n'.join(code.split('\n')[1:]) if lang else code
102
- st.code(code, language=lang, line_numbers=True)
103
- else:
104
- # This is regular text
105
- st.write(part)
106
-
107
- def main():
108
- st.set_page_config(page_title="AI Chatbot", layout="wide")
109
-
110
- st.title("Llama3.1-Instruct-O1")
111
- st.markdown("<a href='https://sambanova.ai/fast-api?api_ref=907266' target='_blank'>Powered by Llama3.1 models through SN Cloud</a>", unsafe_allow_html=True)
112
-
113
- if "chat_history" not in st.session_state:
114
- st.session_state.chat_history = []
115
-
116
- col1, col2 = st.columns([1, 1])
117
-
118
- with col1:
119
- model = st.selectbox("Select Model", MODELS, index=0)
120
- thinking_budget = st.slider("Thinking Budget", 1, 100, 1, help="Control how much it thinks, pick between 1 to 100 inclusive")
121
-
122
- with col2:
123
- system_prompt = st.text_area(
124
- "System Prompt",
125
- value="""
126
- You are a helpful assistant in normal conversation.
127
- When given a problem to solve, you are an expert problem-solving assistant. Your task is to provide a detailed, step-by-step solution to a given question. Follow these instructions carefully:
128
-
129
- 1. Read the given question carefully and reset counter between <count> and </count> to {budget}
130
- 2. Generate a detailed, logical step-by-step solution.
131
- 3. Enclose each step of your solution within <step> and </step> tags.
132
- 4. You are allowed to use at most {budget} steps (starting budget), keep track of it by counting down within tags <count> </count>, STOP GENERATING MORE STEPS when hitting 0, you don't have to use all of them.
133
- 5. Do a self-reflection when you are unsure about how to proceed, based on the self-reflection and reward, decides whether you need to return to the previous steps.
134
- 6. After completing the solution steps, reorganize and synthesize the steps into the final answer within <answer> and </answer> tags.
135
- 7. Provide a critical, honest and subjective self-evaluation of your reasoning process within <reflection> and </reflection> tags.
136
- 8. Assign a quality score to your solution as a float between 0.0 (lowest quality) and 1.0 (highest quality), enclosed in <reward> and </reward> tags.
137
-
138
- Example format:
139
- <count> [starting budget] </count>
140
-
141
- <step> [Content of step 1] </step>
142
- <count> [remaining budget] </count>
143
-
144
- <step> [Content of step 2] </step>
145
- <reflection> [Evaluation of the steps so far] </reflection>
146
- <reward> [Float between 0.0 and 1.0] </reward>
147
- <count> [remaining budget] </count>
148
-
149
- <step> [Content of step 3 or Content of some previous step] </step>
150
- <count> [remaining budget] </count>
151
-
152
- ...
153
-
154
- <step> [Content of final step] </step>
155
- <count> [remaining budget] </count>
156
-
157
- <answer> [Final Answer] </answer>
158
-
159
- <reflection> [Evaluation of the solution] </reflection>
160
-
161
- <reward> [Float between 0.0 and 1.0] </reward>
162
- """,
163
- height=200
164
  )
165
-
166
- st.markdown("---")
167
-
168
- for human, ai, thinking_time in st.session_state.chat_history:
169
- with st.chat_message("human"):
170
- st.write(human)
171
- with st.chat_message("ai"):
172
- display_message_with_code_blocks(ai)
173
- st.caption(f"Thinking time: {thinking_time:.2f} s")
174
-
175
- message = st.chat_input("Type your message here...")
176
-
177
- if message:
178
- with st.chat_message("human"):
179
- st.write(message)
180
-
181
- with st.chat_message("ai"):
182
- response_placeholder = st.empty()
183
- time_placeholder = st.empty()
184
- for response, elapsed_time in respond(message, st.session_state.chat_history, model, system_prompt, thinking_budget):
185
- response_placeholder.markdown(response)
186
- time_placeholder.caption(f"Thinking time: {elapsed_time:.2f} s")
187
- response_placeholder.empty()
188
- time_placeholder.empty()
189
- display_message_with_code_blocks(response)
190
- time_placeholder.caption(f"Thinking time: {elapsed_time:.2f} s")
191
-
192
- st.session_state.chat_history.append((message, response, elapsed_time))
193
-
194
- if st.button("Clear Chat"):
195
- st.session_state.chat_history = []
196
- st.experimental_rerun()
197
-
198
- if __name__ == "__main__":
199
- main()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import openai
 
3
  import time
4
  import re
5
+ import os
 
 
 
 
 
 
 
6
 
7
  # Available models
8
  MODELS = [
 
11
  "Meta-Llama-3.1-8B-Instruct"
12
  ]
13
 
14
+ def create_client(api_key):
15
+ openai.api_key = api_key
16
+ openai.api_base = "https://api.sambanova.ai/v1" # Fixed Base URL
 
 
 
 
17
 
18
  def chat_with_ai(message, chat_history, system_prompt):
19
  messages = [
20
  {"role": "system", "content": system_prompt},
21
  ]
22
+
23
+ for human, ai in chat_history:
24
  messages.append({"role": "user", "content": human})
25
  messages.append({"role": "assistant", "content": ai})
26
+
27
  messages.append({"role": "user", "content": message})
28
+
29
  return messages
30
 
31
+ def respond(message, chat_history, model, system_prompt, thinking_budget, api_key):
32
+ print("Starting respond function...")
33
+ create_client(api_key) # Sets api_key and api_base globally
34
+ messages = chat_with_ai(message, chat_history, system_prompt.format(budget=thinking_budget))
35
  start_time = time.time()
36
+
37
+ try:
38
+ print("Calling OpenAI API...")
39
+ completion = openai.ChatCompletion.create(
40
  model=model,
41
  messages=messages,
42
+ stream=False # Set to False for synchronous response
43
+ )
44
+ response = completion.choices[0].message['content']
45
+ thinking_time = time.time() - start_time
46
+ print("Response received from OpenAI API.")
47
+ yield response, thinking_time
48
+ except Exception as e:
49
+ error_message = f"Error: {str(e)}"
50
+ print(error_message)
51
+ yield error_message, time.time() - start_time
52
+
53
+ def parse_response(response):
54
  answer_match = re.search(r'<answer>(.*?)</answer>', response, re.DOTALL)
55
  reflection_match = re.search(r'<reflection>(.*?)</reflection>', response, re.DOTALL)
56
+
57
  answer = answer_match.group(1).strip() if answer_match else ""
58
  reflection = reflection_match.group(1).strip() if reflection_match else ""
59
+
 
 
 
 
 
 
60
  steps = re.findall(r'<step>(.*?)</step>', response, re.DOTALL)
61
+
62
+ return answer, reflection, steps
63
+
64
+ def process_chat(message, history, model, system_prompt, thinking_budget, api_key):
65
+ print(f"Received message: {message}")
66
+ if not api_key:
67
+ print("API key missing")
68
+ return "Please provide your API Key before starting the chat."
69
+
70
+ try:
71
+ formatted_system_prompt = system_prompt.format(budget=thinking_budget)
72
+ except KeyError as e:
73
+ error_msg = f"System prompt missing placeholder: {str(e)}"
74
+ print(error_msg)
75
+ return error_msg
76
+
77
+ full_response = ""
78
+ thinking_time = 0
79
+
80
+ for response, elapsed_time in respond(message, history, model, formatted_system_prompt, thinking_budget, api_key):
81
+ print(f"Received response: {response}")
82
+ full_response = response
83
+ thinking_time = elapsed_time
84
+
85
+ if full_response.startswith("Error:"):
86
+ return full_response
87
+
88
+ answer, reflection, steps = parse_response(full_response)
89
+
90
+ formatted_response = f"**Answer:** {answer}\n\n**Reflection:** {reflection}\n\n**Thinking Steps:**\n"
91
+ for i, step in enumerate(steps, 1):
92
+ formatted_response += f"**Step {i}:** {step}\n"
93
+
94
+ formatted_response += f"\n**Thinking time:** {thinking_time:.2f} s"
95
+
96
+ print(f"Appended response: {formatted_response}")
97
+ history.append((message, formatted_response))
98
+ return formatted_response
99
+
100
+ # Define the default system prompt
101
+ default_system_prompt = """
102
+ You are a helpful assistant in normal conversation.
103
+ When given a problem to solve, you are an expert problem-solving assistant. Your task is to provide a detailed, step-by-step solution to a given question. Follow these instructions carefully:
104
+
105
+ 1. Read the given question carefully and reset counter between <count> and </count> to {budget}
106
+ 2. Generate a detailed, logical step-by-step solution.
107
+ 3. Enclose each step of your solution within <step> and </step> tags.
108
+ 4. You are allowed to use at most {budget} steps (starting budget), keep track of it by counting down within tags <count> </count>, STOP GENERATING MORE STEPS when hitting 0, you don't have to use all of them.
109
+ 5. Do a self-reflection when you are unsure about how to proceed, based on the self-reflection and reward, decides whether you need to return to the previous steps.
110
+ 6. After completing the solution steps, reorganize and synthesize the steps into the final answer within <answer> and </answer> tags.
111
+ 7. Provide a critical, honest and subjective self-evaluation of your reasoning process within <reflection> and </reflection> tags.
112
+ 8. Assign a quality score to your solution as a float between 0.0 (lowest quality) and 1.0 (highest quality), enclosed in <reward> and </reward> tags.
113
+
114
+ Example format:
115
+ <count> [starting budget] </count>
116
+
117
+ <step> [Content of step 1] </step>
118
+ <count> [remaining budget] </count>
119
+
120
+ <step> [Content of step 2] </step>
121
+ <reflection> [Evaluation of the steps so far] </reflection>
122
+ <reward> [Float between 0.0 and 1.0] </reward>
123
+ <count> [remaining budget] </count>
124
+
125
+ <step> [Content of step 3 or Content of some previous step] </step>
126
+ <count> [remaining budget] </count>
127
+
128
+ ...
129
+
130
+ <step> [Content of final step] </step>
131
+ <count> [remaining budget] </count>
132
+
133
+ <answer> [Final Answer] </answer>
134
+
135
+ <reflection> [Evaluation of the solution] </reflection>
136
+
137
+ <reward> [Float between 0.0 and 1.0] </reward>
138
+ """
139
+
140
+ with gr.Blocks() as demo:
141
+ gr.Markdown("# Llama3.1-Instruct-O1")
142
+ gr.Markdown("[Powered by Llama3.1 models through SN Cloud](https://sambanova.ai/fast-api?api_ref=907266)")
143
+
144
+ with gr.Row():
145
+ api_key = gr.Textbox(
146
+ label="API Key",
147
+ type="password",
148
+ placeholder="Enter your API key here"
 
 
 
 
149
  )
150
+
151
+ with gr.Row():
152
+ model = gr.Dropdown(
153
+ choices=MODELS,
154
+ label="Select Model",
155
+ value=MODELS[0]
156
+ )
157
+ thinking_budget = gr.Slider(
158
+ minimum=1,
159
+ maximum=100,
160
+ value=10,
161
+ step=1,
162
+ label="Thinking Budget"
163
+ )
164
+
165
+ system_prompt = gr.Textbox(
166
+ label="System Prompt",
167
+ value=default_system_prompt,
168
+ lines=10
169
+ )
170
+
171
+ msg = gr.Textbox(
172
+ label="Type your message here...",
173
+ placeholder="Enter your message..."
174
+ )
175
+ submit = gr.Button("Submit")
176
+ clear = gr.Button("Clear Chat")
177
+
178
+ output = gr.Textbox(
179
+ label="Response",
180
+ lines=20,
181
+ interactive=False
182
+ )
183
+
184
+ # Initialize chat history
185
+ chat_history = []
186
+
187
+ def handle_submit(message, history, model, system_prompt, thinking_budget, api_key):
188
+ response = process_chat(message, history, model, system_prompt, thinking_budget, api_key)
189
+ return response
190
+
191
+ def handle_clear():
192
+ return ""
193
+
194
+ submit.click(
195
+ handle_submit,
196
+ inputs=[msg, gr.State(chat_history), model, system_prompt, thinking_budget, api_key],
197
+ outputs=output
198
+ )
199
+
200
+ clear.click(
201
+ lambda: "",
202
+ inputs=None,
203
+ outputs=output
204
+ )
205
+
206
+ demo.launch()