magilogi commited on
Commit
cbf54c8
β€’
1 Parent(s): aa9beda
data/api-results/__pycache__/api_results.cpython-311.pyc CHANGED
Binary files a/data/api-results/__pycache__/api_results.cpython-311.pyc and b/data/api-results/__pycache__/api_results.cpython-311.pyc differ
 
data/api-results/api_results.py CHANGED
@@ -29,4 +29,25 @@ claude_opus = {
29
  'medqa_g2b': 0.8333333333333334,
30
  'medmcqa_og': 0.8649425287356322,
31
  'medmcqa_g2b': 0.7988505747126436
32
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
  'medqa_g2b': 0.8333333333333334,
30
  'medmcqa_og': 0.8649425287356322,
31
  'medmcqa_g2b': 0.7988505747126436
32
+ }
33
+
34
+ gemini_15_pro = {
35
+ 'medqa_og': 0.8862433862433863,
36
+ 'medqa_g2b': 0.873015873015873,
37
+ 'medmcqa_og': 0.8649425287356322,
38
+ 'medmcqa_g2b': 0.8247126436781609
39
+ }
40
+
41
+ gemini_pro_1 = {
42
+ 'medqa_og': 0.7063492063492064,
43
+ 'medqa_g2b': 0.7301587301587301,
44
+ 'medmcqa_og': 0.6810344827586207,
45
+ 'medmcqa_g2b': 0.7385057471264368
46
+ }
47
+
48
+ gemini_15_flash = {
49
+ 'medqa_og': 0.9708994708994709,
50
+ 'medqa_g2b': 0.9603174603174603,
51
+ 'medmcqa_og': 0.9741379310344828,
52
+ 'medmcqa_g2b': 0.9482758620689655
53
+ }
data/csv/models_data.csv CHANGED
@@ -22,3 +22,6 @@ T,Model,b4bqa,b4b,medmcqa_g2b,medmcqa_orig_filtered,medmcqa_diff,medqa_4options_
22
  πŸ’¬,"<a target=""_blank"" href=""https://platform.openai.com/docs/models"" style=""color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;"">GPT-4o</a>",96.48,,86.49,90.52,-4.03,88.36,90.21,-1.85
23
  πŸ’¬,"<a target=""_blank"" href=""https://platform.openai.com/docs/models"" style=""color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;"">GPT-3.5 Turbo</a>",91.74,,97.7,98.28,-0.58,96.03,96.3,-0.27
24
  πŸ’¬,"<a target=""_blank"" href=""https://www.anthropic.com/api"" style=""color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;"">Claude Opus</a>",92.19,,79.89,86.49,-6.6,83.33,85.71,-2.38
 
 
 
 
22
  πŸ’¬,"<a target=""_blank"" href=""https://platform.openai.com/docs/models"" style=""color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;"">GPT-4o</a>",96.48,,86.49,90.52,-4.03,88.36,90.21,-1.85
23
  πŸ’¬,"<a target=""_blank"" href=""https://platform.openai.com/docs/models"" style=""color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;"">GPT-3.5 Turbo</a>",91.74,,97.7,98.28,-0.58,96.03,96.3,-0.27
24
  πŸ’¬,"<a target=""_blank"" href=""https://www.anthropic.com/api"" style=""color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;"">Claude Opus</a>",92.19,,79.89,86.49,-6.6,83.33,85.71,-2.38
25
+ πŸ’¬,"<a target=""_blank"" href=""https://ai.google.dev/"" style=""color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;"">Gemini 1.5 Pro</a>",0.0,,82.47,86.49,-4.02,87.3,88.62,-1.32
26
+ πŸ’¬,"<a target=""_blank"" href=""https://ai.google.dev/"" style=""color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;"">Gemini Pro 1</a>",0.0,,73.85,68.1,5.75,73.02,70.63,2.39
27
+ πŸ’¬,"<a target=""_blank"" href=""https://ai.google.dev/"" style=""color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;"">Gemini 1.5 Flash</a>",0.0,,94.83,97.41,-2.58,96.03,97.09,-1.06
src/__pycache__/models_info.cpython-311.pyc CHANGED
Binary files a/src/__pycache__/models_info.cpython-311.pyc and b/src/__pycache__/models_info.cpython-311.pyc differ
 
src/json2df.py CHANGED
@@ -7,7 +7,7 @@ import sys
7
  sys.path.append(os.path.abspath('data/api-results'))
8
 
9
  # Now import the API results
10
- from api_results import gpt4, gpt4o, gpt35turbo, claude_opus
11
  from models_info import model_info
12
 
13
  directory = 'data/raw-eval-outputs'
@@ -49,7 +49,10 @@ api_models = {
49
  'GPT-4': gpt4,
50
  'GPT-4o': gpt4o,
51
  'GPT-3.5 Turbo': gpt35turbo,
52
- 'Claude Opus': claude_opus
 
 
 
53
  }
54
 
55
  for model_name, results in api_models.items():
 
7
  sys.path.append(os.path.abspath('data/api-results'))
8
 
9
  # Now import the API results
10
+ from api_results import gpt4, gpt4o, gpt35turbo, claude_opus, gemini_15_pro, gemini_pro_1, gemini_15_flash
11
  from models_info import model_info
12
 
13
  directory = 'data/raw-eval-outputs'
 
49
  'GPT-4': gpt4,
50
  'GPT-4o': gpt4o,
51
  'GPT-3.5 Turbo': gpt35turbo,
52
+ 'Claude Opus': claude_opus,
53
+ 'Gemini 1.5 Pro': gemini_15_pro,
54
+ 'Gemini Pro 1': gemini_pro_1,
55
+ 'Gemini 1.5 Flash': gemini_15_flash
56
  }
57
 
58
  for model_name, results in api_models.items():
src/models_info.py CHANGED
@@ -91,5 +91,17 @@ model_info = {
91
  "Claude Opus": {
92
  "link": "https://www.anthropic.com/api",
93
  "tuning": "πŸ’¬"
 
 
 
 
 
 
 
 
 
 
 
 
94
  }
95
  }
 
91
  "Claude Opus": {
92
  "link": "https://www.anthropic.com/api",
93
  "tuning": "πŸ’¬"
94
+ },
95
+ "Gemini 1.5 Pro": {
96
+ "link": "https://ai.google.dev/",
97
+ "tuning": "πŸ’¬"
98
+ },
99
+ "Gemini Pro 1": {
100
+ "link": "https://ai.google.dev/",
101
+ "tuning": "πŸ’¬"
102
+ },
103
+ "Gemini 1.5 Flash": {
104
+ "link": "https://ai.google.dev/",
105
+ "tuning": "πŸ’¬"
106
  }
107
  }