Spaces:

vectara
/

leaderboard

Running on CPU Upgrade

Minseok Bae commited on Jan 5

Commit

b46b972

•

1 Parent(s): dbcffd4

Added citations

Files changed (3) hide show

main_backend.py CHANGED Viewed

@@ -75,12 +75,12 @@ def run_auto_eval(args):
     else:
         eval_request = manage_requests.EvalRequest(
             model=args.model,
-            status=PENDING_STATUS,
             precision=args.precision
         )
         pp.pprint(eval_request)
         logging.info("Running reproducibility eval")
         run_eval_suite.run_evaluation(
             eval_request=eval_request,
             local_dir=envs.EVAL_RESULTS_PATH_BACKEND,
@@ -93,14 +93,14 @@ def run_auto_eval(args):
 def main():
     parser = argparse.ArgumentParser(description="Run auto evaluation with optional reproducibility feature")
     # Optional arguments
     parser.add_argument("--reproduce", type=bool, default=False, help="Reproduce the evaluation results")
     parser.add_argument("--model", type=str, default=None, help="Your Model ID")
     parser.add_argument("--precision", type=str, default="float16", help="Precision of your model")
     args = parser.parse_args()
     run_auto_eval(args)

     else:
         eval_request = manage_requests.EvalRequest(
             model=args.model,
+            status=PENDING_STATUS,
             precision=args.precision
         )
         pp.pprint(eval_request)
         logging.info("Running reproducibility eval")
         run_eval_suite.run_evaluation(
             eval_request=eval_request,
             local_dir=envs.EVAL_RESULTS_PATH_BACKEND,
 def main():
     parser = argparse.ArgumentParser(description="Run auto evaluation with optional reproducibility feature")
     # Optional arguments
     parser.add_argument("--reproduce", type=bool, default=False, help="Reproduce the evaluation results")
     parser.add_argument("--model", type=str, default=None, help="Your Model ID")
     parser.add_argument("--precision", type=str, default="float16", help="Precision of your model")
     args = parser.parse_args()
     run_auto_eval(args)

src/backend/model_operations.py CHANGED Viewed

@@ -8,6 +8,7 @@ import pandas as pd
 import spacy
 # from transformers import AutoModelForCausalLM, AutoTokenizer
 from sentence_transformers import CrossEncoder
 from litellm import completion
 import src.backend.util as util
@@ -22,6 +23,8 @@ nlp = spacy.load("en_core_web_sm")
 os.environ["HUGGINGFACE_API_KEY"] =  envs.TOKEN
 def load_evaluation_model(model_path):
     """Load the evaluation model from the given path

 import spacy
 # from transformers import AutoModelForCausalLM, AutoTokenizer
 from sentence_transformers import CrossEncoder
+import litellm
 from litellm import completion
 import src.backend.util as util
 os.environ["HUGGINGFACE_API_KEY"] =  envs.TOKEN
+litellm.set_verbose=True
 def load_evaluation_model(model_path):
     """Load the evaluation model from the given path

src/display/about.py CHANGED Viewed

@@ -147,4 +147,34 @@ Make sure you have followed the above steps first.
 CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results"
 CITATION_BUTTON_TEXT = r"""
 """

 CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results"
 CITATION_BUTTON_TEXT = r"""
+# This CITATION.cff file was generated with cffinit.
+# Visit https://bit.ly/cffinit to generate yours today!
+cff-version: 1.2.0
+title: Vectara Hallucination Leaderboard
+message: >-
+    If you use this dataset, please cite it using the metadata
+    from this file.
+type: dataset
+authors:
+    - email: [email protected]
+        given-names: Simon
+        family-names: Hughes
+    - given-names: Minseok
+        family-names: Bae
+        email: [email protected]
+repository-code: 'https://github.com/vectara/hallucination-leaderboard'
+url: >-
+    https://github.com/vectara/hallucination-leaderboard/blob/main/README.md
+abstract: >-
+    A leaderboard comparing LLM performance at maintaining
+    factual consistency when summarizing a set of facts.
+keywords:
+    - nlp
+    - llm
+    - hallucination
+    - nli
+    - machine learning
+license: Apache-2.0
+date-released: '2023-11-01'
 """