Minseok Bae commited on
Commit
b46b972
1 Parent(s): dbcffd4

Added citations

Browse files
main_backend.py CHANGED
@@ -75,12 +75,12 @@ def run_auto_eval(args):
75
  else:
76
  eval_request = manage_requests.EvalRequest(
77
  model=args.model,
78
- status=PENDING_STATUS,
79
  precision=args.precision
80
  )
81
  pp.pprint(eval_request)
82
  logging.info("Running reproducibility eval")
83
-
84
  run_eval_suite.run_evaluation(
85
  eval_request=eval_request,
86
  local_dir=envs.EVAL_RESULTS_PATH_BACKEND,
@@ -93,14 +93,14 @@ def run_auto_eval(args):
93
 
94
  def main():
95
  parser = argparse.ArgumentParser(description="Run auto evaluation with optional reproducibility feature")
96
-
97
  # Optional arguments
98
  parser.add_argument("--reproduce", type=bool, default=False, help="Reproduce the evaluation results")
99
  parser.add_argument("--model", type=str, default=None, help="Your Model ID")
100
  parser.add_argument("--precision", type=str, default="float16", help="Precision of your model")
101
-
102
  args = parser.parse_args()
103
-
104
  run_auto_eval(args)
105
 
106
 
 
75
  else:
76
  eval_request = manage_requests.EvalRequest(
77
  model=args.model,
78
+ status=PENDING_STATUS,
79
  precision=args.precision
80
  )
81
  pp.pprint(eval_request)
82
  logging.info("Running reproducibility eval")
83
+
84
  run_eval_suite.run_evaluation(
85
  eval_request=eval_request,
86
  local_dir=envs.EVAL_RESULTS_PATH_BACKEND,
 
93
 
94
  def main():
95
  parser = argparse.ArgumentParser(description="Run auto evaluation with optional reproducibility feature")
96
+
97
  # Optional arguments
98
  parser.add_argument("--reproduce", type=bool, default=False, help="Reproduce the evaluation results")
99
  parser.add_argument("--model", type=str, default=None, help="Your Model ID")
100
  parser.add_argument("--precision", type=str, default="float16", help="Precision of your model")
101
+
102
  args = parser.parse_args()
103
+
104
  run_auto_eval(args)
105
 
106
 
src/backend/model_operations.py CHANGED
@@ -8,6 +8,7 @@ import pandas as pd
8
  import spacy
9
  # from transformers import AutoModelForCausalLM, AutoTokenizer
10
  from sentence_transformers import CrossEncoder
 
11
  from litellm import completion
12
 
13
  import src.backend.util as util
@@ -22,6 +23,8 @@ nlp = spacy.load("en_core_web_sm")
22
 
23
  os.environ["HUGGINGFACE_API_KEY"] = envs.TOKEN
24
 
 
 
25
 
26
  def load_evaluation_model(model_path):
27
  """Load the evaluation model from the given path
 
8
  import spacy
9
  # from transformers import AutoModelForCausalLM, AutoTokenizer
10
  from sentence_transformers import CrossEncoder
11
+ import litellm
12
  from litellm import completion
13
 
14
  import src.backend.util as util
 
23
 
24
  os.environ["HUGGINGFACE_API_KEY"] = envs.TOKEN
25
 
26
+ litellm.set_verbose=True
27
+
28
 
29
  def load_evaluation_model(model_path):
30
  """Load the evaluation model from the given path
src/display/about.py CHANGED
@@ -147,4 +147,34 @@ Make sure you have followed the above steps first.
147
 
148
  CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results"
149
  CITATION_BUTTON_TEXT = r"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
150
  """
 
147
 
148
  CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results"
149
  CITATION_BUTTON_TEXT = r"""
150
+ # This CITATION.cff file was generated with cffinit.
151
+ # Visit https://bit.ly/cffinit to generate yours today!
152
+
153
+ cff-version: 1.2.0
154
+ title: Vectara Hallucination Leaderboard
155
+ message: >-
156
+ If you use this dataset, please cite it using the metadata
157
+ from this file.
158
+ type: dataset
159
+ authors:
160
+ - email: [email protected]
161
+ given-names: Simon
162
+ family-names: Hughes
163
+ - given-names: Minseok
164
+ family-names: Bae
165
166
+ repository-code: 'https://github.com/vectara/hallucination-leaderboard'
167
+ url: >-
168
+ https://github.com/vectara/hallucination-leaderboard/blob/main/README.md
169
+ abstract: >-
170
+ A leaderboard comparing LLM performance at maintaining
171
+ factual consistency when summarizing a set of facts.
172
+ keywords:
173
+ - nlp
174
+ - llm
175
+ - hallucination
176
+ - nli
177
+ - machine learning
178
+ license: Apache-2.0
179
+ date-released: '2023-11-01'
180
  """