Spaces:
Running
Running
File size: 5,011 Bytes
eb437df 6556082 eb437df b4e2dd9 eb437df cc9cab6 eb437df cc9cab6 eb437df cc9cab6 eb437df cc9cab6 eb437df |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 |
import unittest
import os
from omegaconf import OmegaConf
from vectara_agentic.agent import Agent
from agent import initialize_agent
from dotenv import load_dotenv
load_dotenv(override=True)
class TestAgentResponses(unittest.TestCase):
def test_responses(self):
cfg = OmegaConf.create({
'customer_id': str(os.environ['VECTARA_CUSTOMER_ID']),
'corpus_id': str(os.environ['VECTARA_CORPUS_ID']),
'api_key': str(os.environ['VECTARA_API_KEY']),
'examples': os.environ.get('QUERY_EXAMPLES', None)
})
agent = initialize_agent(_cfg=cfg)
self.assertIsInstance(agent, Agent)
# Test whether cases are real or fake
self.assertIn('yes', agent.chat('Is the case Brown v. Board of Education, 347 U.S. 483 (1954), a real case? Say "yes" or "no" only.').lower())
self.assertIn('yes', agent.chat('Is the case Bowers v. Hardwick, 478 U.S. 186 (1986), a real case? Say "yes" or "no" only.').lower())
self.assertIn('no', agent.chat('Is the case Columbia University v. Rodham, 564 U.S. 911 (2010), a real case? Say "yes" or "no" only.').lower())
# Test case citation extraction
self.assertEqual(agent.chat('What is the citation for the case Brown v. Board of Education? Provide ONLY the citation in "<volume>, <reporter>, <page>" format, nothing else.'), '347 U.S. 483')
self.assertEqual(agent.chat('What is the citation for the case Bowers v. Hardwick? Provide ONLY the citation in "<volume>, <reporter>, <page>" format, nothing else.'), '478 U.S. 186')
self.assertEqual(agent.chat('What is the citation for the case McCulloch v. Maryland? Provide ONLY the citation in "<volume>, <reporter>, <page>" format, nothing else.'), '17 U.S. 316')
# Test opinion author identification
self.assertEqual(agent.chat('Who wrote the majority opinion in Brown v. Board of Education, 347 U.S. 483 (1954)? Provide the first and the last name of the judge ONLY.'), 'Earl Warren')
self.assertEqual(agent.chat('Who wrote the majority opinion in Bowers v. Hardwick, 478 U.S. 186 (1986)? Provide the first and the last name of the judge ONLY.'), 'Byron White')
self.assertEqual(agent.chat('Who wrote the majority opinion in McCulloch v. Maryland, 17 U.S. 316 (1819)? Provide the first and the last name of the judge ONLY.'), 'John Marshall')
# Test opinion text understanding
self.assertIn('affirm', agent.chat("Did the court in Plessy v. Ferguson, 163 U.S. 537 (1896) affirm or reverse the lower court's decision? Say 'affirm' or 'reverse' only.").lower())
self.assertIn('reverse', agent.chat("Did the court in Bowers v. Hardwick, 478 U.S. 186 (1986) affirm or reverse the lower court's decision? Say 'affirm' or 'reverse' only.").lower())
self.assertIn('reverse', agent.chat("Did the court in McCulloch v. Maryland, 17 U.S. 316 (1819) affirm or reverse the lower court's decision? Say 'affirm' or 'reverse' only.").lower())
# Test court identification
self.assertIn('united states court of appeals for the second circuit', agent.chat("Which court decided the case Viacom International Inc. v. YouTube, Inc., 676 F.3d 19 (2012)? Provide the name of the court ONLY, nothing else.").lower())
self.assertIn('united states court of appeals for the district of columbia circuit', agent.chat("Which court decided the case Durham v. United States, 214 F.2d 862 (1954)? Provide the name of the court ONLY, nothing else.").lower())
self.assertIn('supreme court', agent.chat("Which court decided the case Bowers v. Hardwick (1986)? Provide the name of the court ONLY, nothing else.").lower())
# Test overruling of case
self.assertIn(agent.chat("What year was Whitney v. California, 274 U.S. 357, overruled? Provide the year only."), ['1969', 'I don\'t know.']) # Our agent seems to not find the answer to this question, which I don't see as a problem (At least it's not hallucinating)
self.assertEqual(agent.chat("What year was Austin v. Michigan Chamber of Commerce, 494 U.S. 652, overruled? Provide the year only."), '2010')
# Compare two rulings
self.assertIn('disagree', agent.chat('Do the cases Brown v. Board of Education, 347 U.S. 483 (1954) and Plessy v. Ferguson, 163 U.S. 537 (1896) agree or disagree with each other? Say "agree" or "disagree" only.').lower())
# self.assertEqual(agent.chat('Do the cases Youngstown Sheet & Tube Co. v. Sawyer, 343 U.S. 579 (1952) and Medellin v. Texas, 552 U.S. 491 (2008) agree or disagree with each other? Say "agree" or "disagree" only.').lower(), 'agree') # Our agent thinks that these rulings disagree, so I commented out this test.
self.assertIn('disagree', agent.chat('Do the cases Whitney v. California, 274 U.S. 357 (1927) and Brandenburg v. Ohio, 395 U.S. 444 (1969) agree or disagree with each other? Say "agree" or "disagree" only.').lower())
if __name__ == "__main__":
unittest.main() |