mrm8488 commited on
Commit
e706e8c
1 Parent(s): 454e13a

Feat: add task synonyms

Browse files
Files changed (1) hide show
  1. app.py +28 -5
app.py CHANGED
@@ -1,5 +1,19 @@
1
  import gradio as gr
2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  # LLM performance data with scores
4
  performance_data = {
5
  "Undergraduate level knowledge": [("Claude 3 Opus", 86.8), ("GPT-4", 86.4), ("Gemini 1.0 Ultra", 83.7)],
@@ -15,9 +29,18 @@ performance_data = {
15
  }
16
 
17
  def recommend_llm(task):
18
- recommendations = performance_data.get(task, [])
19
- if not recommendations:
 
 
 
 
 
 
 
20
  return "No data available"
 
 
21
  recommendations_sorted = sorted(recommendations, key=lambda x: x[1], reverse=True)
22
  result = f"For {task}, the recommended LLMs are:\n"
23
  for i, (model, score) in enumerate(recommendations_sorted):
@@ -27,11 +50,11 @@ def recommend_llm(task):
27
  # Gradio interface
28
  interface = gr.Interface(
29
  fn=recommend_llm,
30
- inputs=gr.Dropdown(list(performance_data.keys()), label="Select Task"),
31
  outputs=gr.Textbox(label="LLM Recommendations"),
32
  title="LLM Recommendation App",
33
- description="Select a task to get recommendations for the best LLMs based on performance data."
34
  )
35
 
36
  # Launch the app
37
- interface.launch()
 
1
  import gradio as gr
2
 
3
+ # Synonyms for each task category
4
+ task_synonyms = {
5
+ "Undergraduate level knowledge": ["undergraduate level knowledge", "MMLU"],
6
+ "Graduate level reasoning": ["graduate level reasoning", "GPOA", "Diamond"],
7
+ "Grade school math": ["grade school math", "GSM8K"],
8
+ "Math problem-solving": ["math problem-solving", "MATH"],
9
+ "Multilingual math": ["multilingual math", "MGSM"],
10
+ "Code": ["code", "coding", "programming", "HumanEval"],
11
+ "Reasoning over text": ["reasoning over text", "DROP", "F1 score"],
12
+ "Mixed evaluations": ["mixed evaluations", "BIG-Bench-Hard"],
13
+ "Knowledge Q&A": ["knowledge Q&A", "ARC-Challenge"],
14
+ "Common Knowledge": ["common knowledge", "HellaSwag"],
15
+ }
16
+
17
  # LLM performance data with scores
18
  performance_data = {
19
  "Undergraduate level knowledge": [("Claude 3 Opus", 86.8), ("GPT-4", 86.4), ("Gemini 1.0 Ultra", 83.7)],
 
29
  }
30
 
31
  def recommend_llm(task):
32
+ # Normalize the input task to match against synonyms
33
+ task_lower = task.lower()
34
+ main_category = None
35
+ for key, synonyms in task_synonyms.items():
36
+ if task_lower in map(str.lower, synonyms):
37
+ main_category = key
38
+ break
39
+
40
+ if not main_category:
41
  return "No data available"
42
+
43
+ recommendations = performance_data.get(main_category, [])
44
  recommendations_sorted = sorted(recommendations, key=lambda x: x[1], reverse=True)
45
  result = f"For {task}, the recommended LLMs are:\n"
46
  for i, (model, score) in enumerate(recommendations_sorted):
 
50
  # Gradio interface
51
  interface = gr.Interface(
52
  fn=recommend_llm,
53
+ inputs=gr.Textbox(label="Enter Task"),
54
  outputs=gr.Textbox(label="LLM Recommendations"),
55
  title="LLM Recommendation App",
56
+ description="Enter a task to get recommendations for the best LLMs based on performance data. For example, you can enter 'coding', 'undergraduate level knowledge', etc."
57
  )
58
 
59
  # Launch the app
60
+ interface.launch()