Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -118,17 +118,82 @@ def convert_history_to_token(history: List[Tuple[str, str]]):
|
|
118 |
input_token = tok(text, return_tensors="pt", **tokenizer_kwargs).input_ids
|
119 |
return input_token
|
120 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
121 |
def bot(history, temperature, top_p, top_k, repetition_penalty, conversation_id):
|
122 |
-
|
123 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
124 |
if input_ids.shape[1] > 2000:
|
|
|
125 |
history = [history[-1]]
|
126 |
-
input_ids = convert_history_to_token(history)
|
127 |
|
128 |
-
|
|
|
|
|
129 |
generate_kwargs = dict(
|
130 |
input_ids=input_ids,
|
131 |
-
max_new_tokens=256,
|
132 |
temperature=temperature,
|
133 |
do_sample=temperature > 0.0,
|
134 |
top_p=top_p,
|
@@ -136,23 +201,32 @@ def bot(history, temperature, top_p, top_k, repetition_penalty, conversation_id)
|
|
136 |
repetition_penalty=repetition_penalty,
|
137 |
streamer=streamer,
|
138 |
)
|
|
|
|
|
|
|
|
|
|
|
139 |
stream_complete = Event()
|
140 |
|
141 |
def generate_and_signal_complete():
|
142 |
ov_model.generate(**generate_kwargs)
|
143 |
stream_complete.set()
|
144 |
|
145 |
-
Thread(target=generate_and_signal_complete)
|
|
|
|
|
|
|
146 |
partial_text = ""
|
147 |
for new_text in streamer:
|
148 |
-
partial_text
|
149 |
-
history
|
|
|
150 |
yield history
|
151 |
|
152 |
def request_cancel():
|
153 |
ov_model.request.cancel()
|
154 |
|
155 |
# Gradio setup and launch
|
156 |
-
demo = make_demo(run_fn=bot, stop_fn=request_cancel, title=f"OpenVINO
|
157 |
if __name__ == "__main__":
|
158 |
demo.launch(debug=True, share=True, server_name="0.0.0.0", server_port=7860)
|
|
|
118 |
input_token = tok(text, return_tensors="pt", **tokenizer_kwargs).input_ids
|
119 |
return input_token
|
120 |
|
121 |
+
# Initialize the search tool
|
122 |
+
search = DuckDuckGoSearchRun()
|
123 |
+
|
124 |
+
# Function to retrieve and format search results based on user input
|
125 |
+
def fetch_search_results(query: str) -> str:
|
126 |
+
search_results = search.invoke(query)
|
127 |
+
# Displaying search results for debugging
|
128 |
+
print("Search results: ", search_results)
|
129 |
+
return f"Relevant and recent information:\n{search_results}"
|
130 |
+
|
131 |
+
# Function to decide if a search is needed based on the user query
|
132 |
+
def should_use_search(query: str) -> bool:
|
133 |
+
# Simple heuristic, can be extended with more advanced intent analysis
|
134 |
+
search_keywords = ["latest", "news", "update", "which" "who", "what", "when", "why","how", "recent", "result", "tell", "explain",
|
135 |
+
"announcement", "bulletin", "report", "brief", "insight", "disclosure", "update",
|
136 |
+
"release", "memo", "headline", "current", "ongoing", "fresh", "upcoming", "immediate",
|
137 |
+
"recently", "new", "now", "in-progress", "inquiry", "query", "ask", "investigate",
|
138 |
+
"explore", "seek", "clarify", "confirm", "discover", "learn", "describe", "define",
|
139 |
+
"illustrate", "outline", "interpret", "expound", "detail", "summarize", "elucidate",
|
140 |
+
"break down", "outcome", "effect", "consequence", "finding", "achievement", "conclusion",
|
141 |
+
"product", "performance", "resolution"
|
142 |
+
]
|
143 |
+
return any(keyword in query.lower() for keyword in search_keywords)
|
144 |
+
|
145 |
+
# Generate prompt for model with optional search context
|
146 |
+
def construct_model_prompt(user_query: str, search_context: str, history: List[Tuple[str, str]]) -> str:
|
147 |
+
# Simple instruction for the model to prioritize search information if available
|
148 |
+
instructions = (
|
149 |
+
"If relevant information is provided below, use it to give an accurate and concise answer. If there is no relevant information available, please rely on your general knowledge and indicate that no recent or specific information is available to answer."
|
150 |
+
)
|
151 |
+
|
152 |
+
# Build the prompt with instructions, search context, and user query
|
153 |
+
prompt = f"{instructions}\n\n"
|
154 |
+
if search_context:
|
155 |
+
prompt += f"{search_context}\n\n" # Include search context prominently at the top
|
156 |
+
|
157 |
+
# Add the user's query
|
158 |
+
prompt += f"{user_query} ?\n\n"
|
159 |
+
|
160 |
+
# Optionally add recent history for context, without labels
|
161 |
+
# if history:
|
162 |
+
# prompt += "Recent conversation:\n"
|
163 |
+
# for user_msg, assistant_msg in history[:-1]: # Exclude the last message to prevent duplication
|
164 |
+
# prompt += f"{user_msg}\n{assistant_msg}\n"
|
165 |
+
|
166 |
+
return prompt
|
167 |
+
|
168 |
+
|
169 |
def bot(history, temperature, top_p, top_k, repetition_penalty, conversation_id):
|
170 |
+
"""
|
171 |
+
Main callback function for running chatbot on submit button click.
|
172 |
+
"""
|
173 |
+
user_query = history[-1][0]
|
174 |
+
search_context = ""
|
175 |
+
|
176 |
+
# Decide if search is required based on the user query
|
177 |
+
if should_use_search(user_query):
|
178 |
+
search_context = fetch_search_results(user_query)
|
179 |
+
prompt = construct_model_prompt(user_query, search_context, history)
|
180 |
+
input_ids = tok(prompt, return_tensors="pt", truncation=True, max_length=2500).input_ids
|
181 |
+
else:
|
182 |
+
# If no search context, use the original logic with tokenization
|
183 |
+
prompt = construct_model_prompt(user_query, "", history)
|
184 |
+
input_ids = convert_history_to_token(history)
|
185 |
+
|
186 |
+
# Ensure input length does not exceed a threshold (e.g., 2000 tokens)
|
187 |
if input_ids.shape[1] > 2000:
|
188 |
+
# If input exceeds the limit, only use the most recent conversation
|
189 |
history = [history[-1]]
|
|
|
190 |
|
191 |
+
# Streamer for model response generation
|
192 |
+
streamer = TextIteratorStreamer(tok, timeout=4600.0, skip_prompt=True, skip_special_tokens=True)
|
193 |
+
|
194 |
generate_kwargs = dict(
|
195 |
input_ids=input_ids,
|
196 |
+
max_new_tokens=256, # Adjust this as needed
|
197 |
temperature=temperature,
|
198 |
do_sample=temperature > 0.0,
|
199 |
top_p=top_p,
|
|
|
201 |
repetition_penalty=repetition_penalty,
|
202 |
streamer=streamer,
|
203 |
)
|
204 |
+
|
205 |
+
if stop_tokens is not None:
|
206 |
+
generate_kwargs["stopping_criteria"] = StoppingCriteriaList(stop_tokens)
|
207 |
+
|
208 |
+
# Event to signal when streaming is complete
|
209 |
stream_complete = Event()
|
210 |
|
211 |
def generate_and_signal_complete():
|
212 |
ov_model.generate(**generate_kwargs)
|
213 |
stream_complete.set()
|
214 |
|
215 |
+
t1 = Thread(target=generate_and_signal_complete)
|
216 |
+
t1.start()
|
217 |
+
|
218 |
+
# Initialize an empty string to store the generated text
|
219 |
partial_text = ""
|
220 |
for new_text in streamer:
|
221 |
+
partial_text = text_processor(partial_text, new_text)
|
222 |
+
# Update the last entry in the original history with the response
|
223 |
+
history[-1] = (user_query, partial_text)
|
224 |
yield history
|
225 |
|
226 |
def request_cancel():
|
227 |
ov_model.request.cancel()
|
228 |
|
229 |
# Gradio setup and launch
|
230 |
+
demo = make_demo(run_fn=bot, stop_fn=request_cancel, title=f"OpenVINO Search & Reasoning Chatbot", language=model_language_value)
|
231 |
if __name__ == "__main__":
|
232 |
demo.launch(debug=True, share=True, server_name="0.0.0.0", server_port=7860)
|