Spaces:
Build error
Build error
from query_data import query_rag | |
from langchain_community.llms.ollama import Ollama | |
EVAL_PROMPT = """ | |
Expected Response: {expected_response} | |
Actual Response: {actual_response} | |
--- | |
(Answer with 'true' or 'false') Does the actual response match the expected response? | |
""" | |
def test_monopoly_rules(): | |
assert query_and_validate( | |
question="How much total money does a player start with in Monopoly? (Answer with the number only)", | |
expected_response="$1500", | |
) | |
def test_ticket_to_ride_rules(): | |
assert query_and_validate( | |
question="How many points does the longest continuous train get in Ticket to Ride? (Answer with the number only)", | |
expected_response="10 points", | |
) | |
def query_and_validate(question: str, expected_response: str): | |
response_text = query_rag(question) | |
prompt = EVAL_PROMPT.format( | |
expected_response=expected_response, actual_response=response_text | |
) | |
model = Ollama(model="mistral") | |
evaluation_results_str = model.invoke(prompt) | |
evaluation_results_str_cleaned = evaluation_results_str.strip().lower() | |
print(prompt) | |
if "true" in evaluation_results_str_cleaned: | |
# Print response in Green if it is correct. | |
print("\033[92m" + f"Response: {evaluation_results_str_cleaned}" + "\033[0m") | |
return True | |
elif "false" in evaluation_results_str_cleaned: | |
# Print response in Red if it is incorrect. | |
print("\033[91m" + f"Response: {evaluation_results_str_cleaned}" + "\033[0m") | |
return False | |
else: | |
raise ValueError( | |
f"Invalid evaluation result. Cannot determine if 'true' or 'false'." | |
) | |