|
import re |
|
from txtai import Embeddings, LLM |
|
import gradio as gr |
|
|
|
|
|
def cot(system, user): |
|
system = f""" |
|
{system} |
|
|
|
You are an AI assistant that uses a Chain of Thought (CoT) approach with reflection to answer queries. Follow these steps: |
|
|
|
1. Think through the problem step by step within the <thinking> tags. |
|
2. Reflect on your thinking to check for any errors or improvements within the <reflection> tags. |
|
3. Make any necessary adjustments based on your reflection. |
|
4. Provide your final, concise answer within the <output> tags. |
|
|
|
Important: The <thinking> and <reflection> sections are for your internal reasoning process only. |
|
Do not include any part of the final answer in these sections. |
|
The actual response to the query must be entirely contained within the <output> tags. |
|
|
|
Use the following format for your response: |
|
<thinking> |
|
[Your step-by-step reasoning goes here. This is your internal thought process, not the final answer.] |
|
<reflection> |
|
[Your reflection on your reasoning, checking for errors or improvements] |
|
</reflection> |
|
[Any adjustments to your thinking based on your reflection] |
|
</thinking> |
|
<output> |
|
[Your final, concise answer to the query. This is the only part that will be shown to the user.] |
|
</output> |
|
""" |
|
|
|
response = llm( |
|
[ |
|
{"role": "system", "content": system}, |
|
{"role": "user", "content": user}, |
|
], |
|
maxlength=4096, |
|
) |
|
|
|
match = re.search(r"<output>(.*?)(?:</output>|$)", response, re.DOTALL) |
|
return match.group(1).strip() if match else response |
|
|
|
|
|
def rag(question): |
|
prompt = """ |
|
Answer the following question using only the context below. Only include information |
|
specifically discussed. |
|
|
|
question: {question} |
|
context: {context} |
|
""" |
|
|
|
system = "You are a friendly assistant. You answer questions from users." |
|
|
|
context = "\n".join([x["text"] for x in embeddings.search(question)]) |
|
|
|
return cot(system, prompt.format(question=question, context=context)) |
|
|
|
|
|
embeddings = Embeddings() |
|
embeddings.load(provider="huggingface-hub", container="neuml/txtai-wikipedia") |
|
|
|
llm = LLM("hugging-quants/Meta-Llama-3.1-8B-Instruct-AWQ-INT4", gpu=True) |
|
|
|
|
|
def predict(message, history): |
|
response = rag(message) |
|
return response |
|
|
|
|
|
gr.ChatInterface( |
|
predict, |
|
title="txtai Reflection Chatbot", |
|
description="A chatbot that uses Chain of Thought (CoT) with self-reflection to answer queries.", |
|
).launch() |
|
|