# Questions should be open-ended but demand concrete answers. questions = [ "Provide an explanation for Japan's Lost Decades.", "What is a cause of World War 1?", ] def create_gen_prompt(question: str, previous_answers: list) -> str: prompt = ( "Answer the following question:.\n" f"{question}\n" "Your response should be a single brief sentence.\n" ) if len(previous_answers) > 0: previous_answers_str = "\n".join( [f"{i+1}. {answer}" for i, answer in enumerate(previous_answers)] ) prompt += ( "IMPORTANT: Provide an answer you *HAVE NOT* given previously.\n" "Your previous answers are inside of XML tags.\n" f"\n{previous_answers_str}\n" ) return prompt def create_judge_prompt(question: str, answer: str): prompt = f""" Your task is to evaluate the coherence and plausibility of an answer to a given question. This involves assessing whether the answer makes sense and isn't nonsensical or implausible. Question: {question} Answer: {answer} Evaluation process: 1. Understand the question: Analyze what the question is asking. 2. Assess the answer: Determine if the answer is coherent and plausible. 3. Check for nonsensical elements: Identify any aspects that are completely unrelated or absurd. Please think through each step carefully and show your reasoning: 1. Question analysis: [Your brief analysis of the question here] 2. Answer assessment: [Evaluate if the answer is coherent and plausible] 3. Nonsensical check: [Identify any completely unrelated or absurd elements] Based on your analysis, provide a final Coherence and Plausibility Score on a scale of 1 - 10, where: 1-3: Incoherent, implausible, or nonsensical 4-6: Partially coherent and plausible, but with some issues 7-8: Mostly coherent and plausible with minor issues 9-10: Highly coherent and plausible Ensure that nonsensical or completely implausible answers receive very low scores (1-3). IMPORTANT: After your reasoning, you must provide your final Coherence and Plausibility Score as a single integer between 1 and 10, enclosed in XML tags. For example: 7 Your response must end with this score in the specified format. """ return prompt