piotr-szleg-bards-ai commited on
Commit
b7b9e52
1 Parent(s): fc8c467

2024-03-05 12:06:07 Publish script update

Browse files
app.py CHANGED
@@ -9,47 +9,7 @@ import plotly.express as px
9
  from pandas.api.types import is_numeric_dtype
10
 
11
  from pipeline.config import LLMBoardConfig, QueriesConfig
12
-
13
- README = """
14
- Projects compares different large language models and their providers for real time applications and mass data processing.
15
- While other benchmarks compare LLMs on different human intelligence tasks this benchmark focus on features related to business and engineering aspects such as response times, pricing and data streaming capabilities.
16
-
17
- To preform evaluation we chose a task of newspaper articles summarization from [GEM/xlsum](https://huggingface.co/datasets/GEM/xlsum) dataset as it represents a very standard type of task where model has to understand unstructured natural language text, process it and output text in a specified format.
18
- For this version we chose English and Japanese languages, with Japanese representing languages using logographic alphabets. This enable us also validate the effectiveness of the LLM for different language groups.
19
-
20
- Each of the models was asked to summarize the text using the following prompt:
21
-
22
- ```
23
- {}
24
- ```
25
-
26
- Where {{language}} stands for original language of the text as we wanted to avoid the model translating the text to English during summarization.
27
-
28
- LLM was asked to return the output in three formats: markdown, json and function call. Note that currently function calls are only supported by Open AI API.
29
- To do that we added following text to the query:
30
-
31
- {}
32
-
33
- All of the call were made from the same machine with the same internet connection with usage of the LiteLLM library which may adds some time overhead compared to pure curl calls. Call were made from Poland, UTC +1.
34
-
35
- Please take a look at the following project and let us know if you have any questions or suggestions.
36
- """
37
-
38
- time_periods_explanation_df = pd.DataFrame(
39
- {
40
- "time_of_day": [
41
- "early morning",
42
- "morning",
43
- "afternoon",
44
- "late afternoon",
45
- "evening",
46
- "late evening",
47
- "midnight",
48
- "night",
49
- ],
50
- "hour_range": ["6-8", "9-11", "12-14", "15-17", "18-20", "21-23", "0-2", "3-5"],
51
- }
52
- )
53
 
54
  queries_config = QueriesConfig()
55
 
@@ -62,7 +22,9 @@ time_of_day_comparison_df = pd.read_csv("data/time_of_day_comparison.csv")
62
  general_plots = pd.read_csv("data/general_plots.csv")
63
  model_costs_df = pd.read_csv("data/model_costs.csv")
64
  time_of_day_plots = pd.read_csv("data/time_of_day_plots.csv")
 
65
  output_plots = pd.read_csv("data/output_plots.csv")
 
66
 
67
  searched_query = ""
68
  collapse_languages = False
@@ -73,7 +35,7 @@ def filter_dataframes(input: str):
73
  global searched_query
74
  input = input.lower()
75
  searched_query = input
76
- return dataframes()
77
 
78
 
79
  def collapse_languages_toggle():
@@ -84,7 +46,7 @@ def collapse_languages_toggle():
84
  else:
85
  collapse_languages = True
86
  button_text = "Un-collapse languages"
87
- return dataframes()[0], button_text
88
 
89
 
90
  def collapse_output_method_toggle():
@@ -95,9 +57,9 @@ def collapse_output_method_toggle():
95
  else:
96
  collapse_output_method = True
97
  button_text = "Un-collapse output method"
98
- return dataframes()[0], button_text
99
 
100
- def filter_dataframe(df, searched_model_names):
101
  if not searched_model_names:
102
  return df
103
  filter_series = df.model == "" # False values
@@ -105,7 +67,7 @@ def filter_dataframe(df, searched_model_names):
105
  filter_series = filter_series | df.model.str.lower().str.contains(n)
106
  return df[filter_series]
107
 
108
- def dataframes():
109
  global collapse_languages, collapse_output_method, searched_query, summary_df, time_of_day_comparison_df, model_costs_df
110
 
111
  summary_df_columns = summary_df.columns.to_list()
@@ -124,7 +86,7 @@ def dataframes():
124
  searched_model_names = [n for n in searched_model_names if n]
125
 
126
  def for_dataframe(df):
127
- return dataframe_style(filter_dataframe(df, searched_model_names))
128
 
129
  return (
130
  for_dataframe(summary_df_processed),
@@ -155,22 +117,26 @@ def dataframe_style(df: pd.DataFrame):
155
  df = df.style.format(column_formats, na_rep="")
156
  return df
157
 
158
-
159
  def snake_case_to_title(text):
160
  # Convert snake_case to title-case
161
  words = re.split(r"_", text)
162
  title_words = [word.capitalize() for word in words]
163
  return " ".join(title_words)
164
 
165
-
166
- filter_textbox = gr.Textbox(label="Model name parts *", scale=2)
167
- filter_button = gr.Button("Filter", scale=1)
168
- collapse_languages_button = gr.Button("Collapse languages")
169
- collapse_output_method_button = gr.Button("Collapse output method")
170
- last_textbox = 0
171
  plots = []
172
- single_model_plots = []
173
 
 
 
 
 
 
 
 
 
 
 
 
 
174
 
175
  def filter_plots(searched_query: str):
176
  searched_model_names = searched_query.split("|")
@@ -183,8 +149,12 @@ def filter_plots(searched_query: str):
183
  if "df" in row and pd.notna(row["df"]):
184
  buffer = io.StringIO(row["df"])
185
  df = pd.read_csv(buffer)
186
- df = filter_dataframe(df, searched_model_names)
187
- plot = px.bar(df, **json.loads(row["arguments"]))
 
 
 
 
188
  plot.update_layout(autosize=True)
189
  elif "for model" in row["header"] and searched_model_names:
190
  plot_model = row["header"].split("for model")[1].lower()
@@ -195,22 +165,16 @@ def filter_plots(searched_query: str):
195
 
196
  return results
197
 
198
-
199
- def display_plot(plot_df_row):
200
- row = dict(plot_df_row)
201
- plot = plotly.io.from_json(row["plot_json"])
202
- plot.update_layout(autosize=True)
203
- plots.append((gr.Plot(plot, label=row["header"], scale=1), plot, row))
204
- if "description" in row and pd.notna(row["description"]):
205
- gr.Markdown(str(row["description"]))
206
-
207
-
208
- with gr.Blocks(theme=gr.themes.Default(text_size="lg")) as demo:
209
  gr.HTML("<h1>Performance LLM Board</h1>")
210
 
211
  with gr.Row():
212
- filter_textbox.render()
213
- filter_button.render()
 
 
 
 
214
  gr.Markdown(
215
  '&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;\* You can use `|` operator to display multiple models at once, for example "gpt|mistral|zephyr"'
216
  )
@@ -223,9 +187,9 @@ with gr.Blocks(theme=gr.themes.Default(text_size="lg")) as demo:
223
  )
224
  with gr.Tab("Performance by time of the day"):
225
  # display only first plot for all models
226
- time_of_day_plots[0:1].apply(display_plot, axis=1)
227
  time_periods_explanation_ui = gr.DataFrame(
228
- dataframe_style(time_periods_explanation_df), label="Times of day ranges"
229
  )
230
  time_of_day_comparison_ui = gr.DataFrame(dataframe_style(time_of_day_comparison_df), label="Time of day")
231
  gr.Markdown(
@@ -240,11 +204,11 @@ Measurements were made during a normal work week.
240
  """
241
  )
242
  # display rest of the plots
243
- time_of_day_plots[1:].apply(display_plot, axis=1)
244
  with gr.Tab("Output characteristics"):
245
  with gr.Row():
246
- collapse_languages_button.render()
247
- collapse_output_method_button.render()
248
  summary_ui = gr.DataFrame(dataframe_style(summary_df), label="Output characteristics")
249
  gr.Markdown(
250
  """\
@@ -256,7 +220,7 @@ To count words we split the output string by whitespace `\w` regex character.
256
 
257
  Chunk sizes are measured in the characters count."""
258
  )
259
- output_plots.apply(display_plot, axis=1)
260
  with gr.Tab("Costs comparison"):
261
  models_costs_ui = gr.DataFrame(dataframe_style(model_costs_df), label="Costs comparison")
262
  gr.Markdown(
@@ -269,9 +233,11 @@ for models hosted this way we calculated "Cost Per Token" column using data coll
269
  Note that pause and resume time cost was not included in the "Cost Per Token" column calculation.
270
  """
271
  )
272
- general_plots[general_plots.plot_name == "execution_costs"].apply(display_plot, axis=1)
 
 
273
  with gr.Tab("Context length and parameters count"):
274
- general_plots[general_plots.plot_name != "execution_costs"].apply(display_plot, axis=1)
275
  gr.Markdown(
276
  """
277
  LLM models context length and parameters count are based on release blogs and documentation of their respective developers.
@@ -281,6 +247,40 @@ A lot of models had to be omitted due to their developers not disclosing their p
281
  Mainly OpenAI's GPT models and Google's Palm 2.
282
  """
283
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
284
  filter_button.click(
285
  fn=filter_dataframes,
286
  inputs=filter_textbox,
 
9
  from pandas.api.types import is_numeric_dtype
10
 
11
  from pipeline.config import LLMBoardConfig, QueriesConfig
12
+ from app_constants import README, JS, TIME_PERIODS_EXPLANATION_DF
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
 
14
  queries_config = QueriesConfig()
15
 
 
22
  general_plots = pd.read_csv("data/general_plots.csv")
23
  model_costs_df = pd.read_csv("data/model_costs.csv")
24
  time_of_day_plots = pd.read_csv("data/time_of_day_plots.csv")
25
+ summary_metrics_plots = pd.read_csv("data/summary_metrics_plots.csv")
26
  output_plots = pd.read_csv("data/output_plots.csv")
27
+ combined_plots = pd.read_csv("data/combined_plots.csv")
28
 
29
  searched_query = ""
30
  collapse_languages = False
 
35
  global searched_query
36
  input = input.lower()
37
  searched_query = input
38
+ return get_updated_dataframes()
39
 
40
 
41
  def collapse_languages_toggle():
 
46
  else:
47
  collapse_languages = True
48
  button_text = "Un-collapse languages"
49
+ return get_updated_dataframes()[0], button_text
50
 
51
 
52
  def collapse_output_method_toggle():
 
57
  else:
58
  collapse_output_method = True
59
  button_text = "Un-collapse output method"
60
+ return get_updated_dataframes()[0], button_text
61
 
62
+ def filter_dataframe_by_models(df, searched_model_names):
63
  if not searched_model_names:
64
  return df
65
  filter_series = df.model == "" # False values
 
67
  filter_series = filter_series | df.model.str.lower().str.contains(n)
68
  return df[filter_series]
69
 
70
+ def get_updated_dataframes():
71
  global collapse_languages, collapse_output_method, searched_query, summary_df, time_of_day_comparison_df, model_costs_df
72
 
73
  summary_df_columns = summary_df.columns.to_list()
 
86
  searched_model_names = [n for n in searched_model_names if n]
87
 
88
  def for_dataframe(df):
89
+ return dataframe_style(filter_dataframe_by_models(df, searched_model_names))
90
 
91
  return (
92
  for_dataframe(summary_df_processed),
 
117
  df = df.style.format(column_formats, na_rep="")
118
  return df
119
 
 
120
  def snake_case_to_title(text):
121
  # Convert snake_case to title-case
122
  words = re.split(r"_", text)
123
  title_words = [word.capitalize() for word in words]
124
  return " ".join(title_words)
125
 
 
 
 
 
 
 
126
  plots = []
 
127
 
128
+ def display_plot(plot_df_row):
129
+ row = dict(plot_df_row)
130
+ plot = plotly.io.from_json(row["plot_json"])
131
+ plot.update_layout(autosize=True)
132
+ return (gr.Plot(plot, label=row["header"], scale=1), plot)
133
+
134
+ def display_filtered_plot(plot_df_row):
135
+ row = dict(plot_df_row)
136
+ plot_element, plot = display_plot(plot_df_row)
137
+ plots.append((plot_element, plot, row))
138
+ if "description" in row and pd.notna(row["description"]):
139
+ gr.Markdown(str(row["description"]))
140
 
141
  def filter_plots(searched_query: str):
142
  searched_model_names = searched_query.split("|")
 
149
  if "df" in row and pd.notna(row["df"]):
150
  buffer = io.StringIO(row["df"])
151
  df = pd.read_csv(buffer)
152
+ df = filter_dataframe_by_models(df, searched_model_names)
153
+ plot_constructor = px.bar
154
+ if "plot_type" in row and pd.notna(row["plot_type"]) and row["plot_type"]:
155
+ if row["plot_type"] == "scatter":
156
+ plot_constructor = px.scatter
157
+ plot = plot_constructor(df, **json.loads(row["arguments"]))
158
  plot.update_layout(autosize=True)
159
  elif "for model" in row["header"] and searched_model_names:
160
  plot_model = row["header"].split("for model")[1].lower()
 
165
 
166
  return results
167
 
168
+ with gr.Blocks(theme=gr.themes.Default(text_size="lg"), js=JS) as demo:
 
 
 
 
 
 
 
 
 
 
169
  gr.HTML("<h1>Performance LLM Board</h1>")
170
 
171
  with gr.Row():
172
+ filter_textbox = gr.Textbox(label="Model name parts *", scale=2, elem_id="filter-textbox")
173
+ filter_button = gr.Button("Filter", scale=1, elem_id="filter-button")
174
+ with gr.Column(scale=1):
175
+ open_ai_button = gr.Button("Compare Open AI models", elem_id="open-ai-button", scale=1)
176
+ google_button = gr.Button("Compare Google Models", elem_id="google-button", scale=1)
177
+ # gr.Button("Open Models", size="sm")
178
  gr.Markdown(
179
  '&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;\* You can use `|` operator to display multiple models at once, for example "gpt|mistral|zephyr"'
180
  )
 
187
  )
188
  with gr.Tab("Performance by time of the day"):
189
  # display only first plot for all models
190
+ time_of_day_plots[0:1].apply(display_filtered_plot, axis=1)
191
  time_periods_explanation_ui = gr.DataFrame(
192
+ dataframe_style(TIME_PERIODS_EXPLANATION_DF), label="Times of day ranges"
193
  )
194
  time_of_day_comparison_ui = gr.DataFrame(dataframe_style(time_of_day_comparison_df), label="Time of day")
195
  gr.Markdown(
 
204
  """
205
  )
206
  # display rest of the plots
207
+ time_of_day_plots[1:].apply(display_filtered_plot, axis=1)
208
  with gr.Tab("Output characteristics"):
209
  with gr.Row():
210
+ collapse_languages_button = gr.Button("Collapse languages")
211
+ collapse_output_method_button = gr.Button("Collapse output method")
212
  summary_ui = gr.DataFrame(dataframe_style(summary_df), label="Output characteristics")
213
  gr.Markdown(
214
  """\
 
220
 
221
  Chunk sizes are measured in the characters count."""
222
  )
223
+ output_plots.apply(display_filtered_plot, axis=1)
224
  with gr.Tab("Costs comparison"):
225
  models_costs_ui = gr.DataFrame(dataframe_style(model_costs_df), label="Costs comparison")
226
  gr.Markdown(
 
233
  Note that pause and resume time cost was not included in the "Cost Per Token" column calculation.
234
  """
235
  )
236
+ general_plots[general_plots.plot_name == "execution_costs"].apply(display_filtered_plot, axis=1)
237
+ with gr.Tab("Summary metrics"):
238
+ summary_metrics_plots.apply(display_filtered_plot, axis=1)
239
  with gr.Tab("Context length and parameters count"):
240
+ general_plots[general_plots.plot_name != "execution_costs"].apply(display_filtered_plot, axis=1)
241
  gr.Markdown(
242
  """
243
  LLM models context length and parameters count are based on release blogs and documentation of their respective developers.
 
247
  Mainly OpenAI's GPT models and Google's Palm 2.
248
  """
249
  )
250
+ with gr.Tab("Combined plots"):
251
+ with gr.Row():
252
+ choices = combined_plots.header
253
+ choices = choices[choices.str.contains("for model")]
254
+ choices = choices.str.split("for model").apply(lambda x: x[1])
255
+ def handle_dropdown(dropdown, plot_element):
256
+ def dropdown_change_handler(value):
257
+ for _, row in combined_plots.iterrows():
258
+ if value in row["header"]:
259
+ return display_plot(row)[0]
260
+ dropdown.change(
261
+ fn=dropdown_change_handler,
262
+ inputs=[dropdown],
263
+ outputs=[plot_element],
264
+ api_name="dropdown_change_handler",
265
+ )
266
+ with gr.Column():
267
+ dropdown = gr.Dropdown(choices.tolist(), label="First model for comparison", value=choices.iloc[0])
268
+ plot_element, plot = display_plot(combined_plots.iloc[0])
269
+ handle_dropdown(dropdown, plot_element)
270
+ with gr.Column():
271
+ dropdown = gr.Dropdown(choices.tolist(), label="Second model for comparison", value=choices.iloc[1])
272
+ plot_element, plot = display_plot(combined_plots.iloc[1])
273
+ handle_dropdown(dropdown, plot_element)
274
+ gr.Markdown("""
275
+ Radial plots are used to compare the most important aspects of each model researched on this board using single images.
276
+
277
+ All values are normalized and scaled into 0.25 to 1 range, 0 is left for unknown values.
278
+
279
+ To compare the parameters more thoroughly use the filtering box on top of this page and inspect individual tabs.
280
+
281
+ In addition to side by side comparison all of the radial plots are displayed below.
282
+ """)
283
+ combined_plots.apply(display_filtered_plot, axis=1)
284
  filter_button.click(
285
  fn=filter_dataframes,
286
  inputs=filter_textbox,
app_constants.py ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+
3
+ README = """
4
+ This project compares different large language models and their providers for real time applications and mass data processing.
5
+ While other benchmarks compare LLMs on different human intelligence tasks this benchmark focus on features related to business and engineering aspects such as response times, pricing and data streaming capabilities.
6
+
7
+ To preform evaluation we chose a task of newspaper articles summarization from [GEM/xlsum](https://huggingface.co/datasets/GEM/xlsum) dataset as it represents a very standard type of task where model has to understand unstructured natural language text, process it and output text in a specified format.
8
+ For this version we chose English and Japanese languages, with Japanese representing languages using logographic alphabets. This enable us also validate the effectiveness of the LLM for different language groups.
9
+
10
+ Each of the models was asked to summarize the text using the following prompt:
11
+
12
+ ```
13
+ {}
14
+ ```
15
+
16
+ Where {{language}} stands for original language of the text as we wanted to avoid the model translating the text to English during summarization.
17
+
18
+ LLM was asked to return the output in three formats: markdown, json and function call. Note that currently function calls are only supported by Open AI API.
19
+ To do that we added following text to the query:
20
+
21
+ {}
22
+
23
+ All of the call were made from the same machine with the same internet connection with usage of the LiteLLM library which may adds some time overhead compared to pure curl calls. Call were made from Poland, UTC +1.
24
+
25
+ Please take a look at the following project and let us know if you have any questions or suggestions.
26
+ """
27
+
28
+ JS = """
29
+ function test() {
30
+ var google_button = document.querySelector('#google-button')
31
+ var open_ai_button = document.querySelector('#open-ai-button')
32
+ var filter_textbox = document.querySelector('#filter-textbox textarea')
33
+ var filter_button = document.querySelector('#filter-button')
34
+
35
+ console.log(google_button, filter_textbox, filter_button)
36
+ function for_button(button, search_query) {
37
+ button.onclick = function() {
38
+ filter_textbox.value = search_query
39
+
40
+ var input_event = new InputEvent('input', {
41
+ bubbles: true,
42
+ cancelable: true,
43
+ composed: true
44
+ })
45
+ filter_textbox.dispatchEvent(input_event);
46
+ setTimeout(
47
+ ()=>filter_button.click(),
48
+ 1000
49
+ )
50
+ }
51
+ }
52
+ for_button(google_button, "gemini-pro | PaLM 2")
53
+ for_button(open_ai_button, "gpt-4 | gpt-4-turbo | gpt-3.5-turbo")
54
+ }
55
+ """
56
+
57
+ TIME_PERIODS_EXPLANATION_DF = pd.DataFrame(
58
+ {
59
+ "time_of_day": [
60
+ "early morning",
61
+ "morning",
62
+ "afternoon",
63
+ "late afternoon",
64
+ "evening",
65
+ "late evening",
66
+ "midnight",
67
+ "night",
68
+ ],
69
+ "hour_range": ["6-8", "9-11", "12-14", "15-17", "18-20", "21-23", "0-2", "3-5"],
70
+ }
71
+ )
data/combined_plots.csv ADDED
The diff for this file is too large to render. See raw diff
 
data/general_plots.csv CHANGED
@@ -10,7 +10,7 @@ execution_costs,./html/plots/execution_costs.html,"Figure({
10
  'showlegend': True,
11
  'textposition': 'auto',
12
  'type': 'bar',
13
- 'x': array([45.87]),
14
  'xaxis': 'x',
15
  'y': array(['gpt-4'], dtype=object),
16
  'yaxis': 'y'},
@@ -24,261 +24,303 @@ execution_costs,./html/plots/execution_costs.html,"Figure({
24
  'showlegend': True,
25
  'textposition': 'auto',
26
  'type': 'bar',
27
- 'x': array([19.2168]),
28
  'xaxis': 'x',
29
  'y': array(['gpt-4-turbo'], dtype=object),
30
  'yaxis': 'y'},
31
  {'alignmentgroup': 'True',
32
  'hovertemplate': 'Model=%{y}<br>Execution cost ($)=%{x}<extra></extra>',
33
- 'legendgroup': 'gpt-3.5-turbo',
34
  'marker': {'color': '#00cc96', 'pattern': {'shape': ''}},
35
- 'name': 'gpt-3.5-turbo',
36
- 'offsetgroup': 'gpt-3.5-turbo',
37
  'orientation': 'h',
38
  'showlegend': True,
39
  'textposition': 'auto',
40
  'type': 'bar',
41
- 'x': array([1.75176]),
42
  'xaxis': 'x',
43
- 'y': array(['gpt-3.5-turbo'], dtype=object),
44
  'yaxis': 'y'},
45
  {'alignmentgroup': 'True',
46
  'hovertemplate': 'Model=%{y}<br>Execution cost ($)=%{x}<extra></extra>',
47
- 'legendgroup': 'llama-2-70b-chat',
48
  'marker': {'color': '#ab63fa', 'pattern': {'shape': ''}},
49
- 'name': 'llama-2-70b-chat',
50
- 'offsetgroup': 'llama-2-70b-chat',
51
  'orientation': 'h',
52
  'showlegend': True,
53
  'textposition': 'auto',
54
  'type': 'bar',
55
- 'x': array([0.65934]),
56
  'xaxis': 'x',
57
- 'y': array(['llama-2-70b-chat'], dtype=object),
58
  'yaxis': 'y'},
59
  {'alignmentgroup': 'True',
60
  'hovertemplate': 'Model=%{y}<br>Execution cost ($)=%{x}<extra></extra>',
61
- 'legendgroup': 'Mixtral-8x7B-Instruct-v0.1',
62
  'marker': {'color': '#FFA15A', 'pattern': {'shape': ''}},
63
- 'name': 'Mixtral-8x7B-Instruct-v0.1',
64
- 'offsetgroup': 'Mixtral-8x7B-Instruct-v0.1',
65
  'orientation': 'h',
66
  'showlegend': True,
67
  'textposition': 'auto',
68
  'type': 'bar',
69
- 'x': array([0.65934]),
70
  'xaxis': 'x',
71
- 'y': array(['Mixtral-8x7B-Instruct-v0.1'], dtype=object),
72
  'yaxis': 'y'},
73
  {'alignmentgroup': 'True',
74
  'hovertemplate': 'Model=%{y}<br>Execution cost ($)=%{x}<extra></extra>',
75
- 'legendgroup': '01-ai Yi Chat (34B)',
76
  'marker': {'color': '#19d3f3', 'pattern': {'shape': ''}},
77
- 'name': '01-ai Yi Chat (34B)',
78
- 'offsetgroup': '01-ai Yi Chat (34B)',
79
  'orientation': 'h',
80
  'showlegend': True,
81
  'textposition': 'auto',
82
  'type': 'bar',
83
- 'x': array([0.58184]),
84
  'xaxis': 'x',
85
- 'y': array(['01-ai Yi Chat (34B)'], dtype=object),
86
  'yaxis': 'y'},
87
  {'alignmentgroup': 'True',
88
  'hovertemplate': 'Model=%{y}<br>Execution cost ($)=%{x}<extra></extra>',
89
- 'legendgroup': 'Snorkel Mistral PairRM DPO (7B)',
90
  'marker': {'color': '#FF6692', 'pattern': {'shape': ''}},
91
- 'name': 'Snorkel Mistral PairRM DPO (7B)',
92
- 'offsetgroup': 'Snorkel Mistral PairRM DPO (7B)',
93
  'orientation': 'h',
94
  'showlegend': True,
95
  'textposition': 'auto',
96
  'type': 'bar',
97
- 'x': array([0.334256]),
98
  'xaxis': 'x',
99
- 'y': array(['Snorkel Mistral PairRM DPO (7B)'], dtype=object),
100
  'yaxis': 'y'},
101
  {'alignmentgroup': 'True',
102
  'hovertemplate': 'Model=%{y}<br>Execution cost ($)=%{x}<extra></extra>',
103
- 'legendgroup': 'Chronos Hermes (13B)',
104
  'marker': {'color': '#B6E880', 'pattern': {'shape': ''}},
105
- 'name': 'Chronos Hermes (13B)',
106
- 'offsetgroup': 'Chronos Hermes (13B)',
107
  'orientation': 'h',
108
  'showlegend': True,
109
  'textposition': 'auto',
110
  'type': 'bar',
111
- 'x': array([0.27396]),
112
  'xaxis': 'x',
113
- 'y': array(['Chronos Hermes (13B)'], dtype=object),
114
  'yaxis': 'y'},
115
  {'alignmentgroup': 'True',
116
  'hovertemplate': 'Model=%{y}<br>Execution cost ($)=%{x}<extra></extra>',
117
- 'legendgroup': 'WizardLM v1.2 (13B)',
118
  'marker': {'color': '#FF97FF', 'pattern': {'shape': ''}},
119
- 'name': 'WizardLM v1.2 (13B)',
120
- 'offsetgroup': 'WizardLM v1.2 (13B)',
121
  'orientation': 'h',
122
  'showlegend': True,
123
  'textposition': 'auto',
124
  'type': 'bar',
125
- 'x': array([0.21207]),
126
  'xaxis': 'x',
127
- 'y': array(['WizardLM v1.2 (13B)'], dtype=object),
128
  'yaxis': 'y'},
129
  {'alignmentgroup': 'True',
130
  'hovertemplate': 'Model=%{y}<br>Execution cost ($)=%{x}<extra></extra>',
131
- 'legendgroup': 'gemini-pro',
132
  'marker': {'color': '#FECB52', 'pattern': {'shape': ''}},
133
- 'name': 'gemini-pro',
134
- 'offsetgroup': 'gemini-pro',
135
  'orientation': 'h',
136
  'showlegend': True,
137
  'textposition': 'auto',
138
  'type': 'bar',
139
- 'x': array([0.18315]),
140
  'xaxis': 'x',
141
- 'y': array(['gemini-pro'], dtype=object),
142
  'yaxis': 'y'},
143
  {'alignmentgroup': 'True',
144
  'hovertemplate': 'Model=%{y}<br>Execution cost ($)=%{x}<extra></extra>',
145
- 'legendgroup': 'chat-bison (PaLM 2)',
146
  'marker': {'color': '#636efa', 'pattern': {'shape': ''}},
147
- 'name': 'chat-bison (PaLM 2)',
148
- 'offsetgroup': 'chat-bison (PaLM 2)',
149
  'orientation': 'h',
150
  'showlegend': True,
151
  'textposition': 'auto',
152
  'type': 'bar',
153
- 'x': array([0.18315]),
154
  'xaxis': 'x',
155
- 'y': array(['chat-bison (PaLM 2)'], dtype=object),
156
  'yaxis': 'y'},
157
  {'alignmentgroup': 'True',
158
  'hovertemplate': 'Model=%{y}<br>Execution cost ($)=%{x}<extra></extra>',
159
- 'legendgroup': 'chat-bison-32k (PaLM 2 32K)',
160
  'marker': {'color': '#EF553B', 'pattern': {'shape': ''}},
161
- 'name': 'chat-bison-32k (PaLM 2 32K)',
162
- 'offsetgroup': 'chat-bison-32k (PaLM 2 32K)',
163
  'orientation': 'h',
164
  'showlegend': True,
165
  'textposition': 'auto',
166
  'type': 'bar',
167
- 'x': array([0.18315]),
168
  'xaxis': 'x',
169
- 'y': array(['chat-bison-32k (PaLM 2 32K)'], dtype=object),
170
  'yaxis': 'y'},
171
  {'alignmentgroup': 'True',
172
  'hovertemplate': 'Model=%{y}<br>Execution cost ($)=%{x}<extra></extra>',
173
- 'legendgroup': 'Upstage SOLAR Instruct v1 (11B)',
174
  'marker': {'color': '#00cc96', 'pattern': {'shape': ''}},
175
- 'name': 'Upstage SOLAR Instruct v1 (11B)',
176
- 'offsetgroup': 'Upstage SOLAR Instruct v1 (11B)',
177
  'orientation': 'h',
178
  'showlegend': True,
179
  'textposition': 'auto',
180
  'type': 'bar',
181
- 'x': array([0.180288]),
182
  'xaxis': 'x',
183
- 'y': array(['Upstage SOLAR Instruct v1 (11B)'], dtype=object),
184
  'yaxis': 'y'},
185
  {'alignmentgroup': 'True',
186
  'hovertemplate': 'Model=%{y}<br>Execution cost ($)=%{x}<extra></extra>',
187
- 'legendgroup': 'Mistral (7B) Instruct v0.2 (Together AI)',
188
  'marker': {'color': '#ab63fa', 'pattern': {'shape': ''}},
189
- 'name': 'Mistral (7B) Instruct v0.2 (Together AI)',
190
- 'offsetgroup': 'Mistral (7B) Instruct v0.2 (Together AI)',
191
  'orientation': 'h',
192
  'showlegend': True,
193
  'textposition': 'auto',
194
  'type': 'bar',
195
- 'x': array([0.165154]),
196
  'xaxis': 'x',
197
- 'y': array(['Mistral (7B) Instruct v0.2 (Together AI)'], dtype=object),
198
  'yaxis': 'y'},
199
  {'alignmentgroup': 'True',
200
  'hovertemplate': 'Model=%{y}<br>Execution cost ($)=%{x}<extra></extra>',
201
- 'legendgroup': 'LLaMA-2 Chat (7B)',
202
  'marker': {'color': '#FFA15A', 'pattern': {'shape': ''}},
203
- 'name': 'LLaMA-2 Chat (7B)',
204
- 'offsetgroup': 'LLaMA-2 Chat (7B)',
205
  'orientation': 'h',
206
  'showlegend': True,
207
  'textposition': 'auto',
208
  'type': 'bar',
209
- 'x': array([0.163296]),
210
  'xaxis': 'x',
211
- 'y': array(['LLaMA-2 Chat (7B)'], dtype=object),
212
  'yaxis': 'y'},
213
  {'alignmentgroup': 'True',
214
  'hovertemplate': 'Model=%{y}<br>Execution cost ($)=%{x}<extra></extra>',
215
- 'legendgroup': 'OpenHermes-2.5-Mistral (7B)',
216
  'marker': {'color': '#19d3f3', 'pattern': {'shape': ''}},
217
- 'name': 'OpenHermes-2.5-Mistral (7B)',
218
- 'offsetgroup': 'OpenHermes-2.5-Mistral (7B)',
219
  'orientation': 'h',
220
  'showlegend': True,
221
  'textposition': 'auto',
222
  'type': 'bar',
223
- 'x': array([0.14182]),
224
  'xaxis': 'x',
225
- 'y': array(['OpenHermes-2.5-Mistral (7B)'], dtype=object),
226
  'yaxis': 'y'},
227
  {'alignmentgroup': 'True',
228
  'hovertemplate': 'Model=%{y}<br>Execution cost ($)=%{x}<extra></extra>',
229
- 'legendgroup': 'Qwen 1.5 Chat (7B)',
230
  'marker': {'color': '#FF6692', 'pattern': {'shape': ''}},
231
- 'name': 'Qwen 1.5 Chat (7B)',
232
- 'offsetgroup': 'Qwen 1.5 Chat (7B)',
233
  'orientation': 'h',
234
  'showlegend': True,
235
  'textposition': 'auto',
236
  'type': 'bar',
237
- 'x': array([0.137592]),
238
  'xaxis': 'x',
239
- 'y': array(['Qwen 1.5 Chat (7B)'], dtype=object),
240
  'yaxis': 'y'},
241
  {'alignmentgroup': 'True',
242
  'hovertemplate': 'Model=%{y}<br>Execution cost ($)=%{x}<extra></extra>',
243
- 'legendgroup': 'Vicuna v1.5 (7B)',
244
  'marker': {'color': '#B6E880', 'pattern': {'shape': ''}},
245
- 'name': 'Vicuna v1.5 (7B)',
246
- 'offsetgroup': 'Vicuna v1.5 (7B)',
247
  'orientation': 'h',
248
  'showlegend': True,
249
  'textposition': 'auto',
250
  'type': 'bar',
251
- 'x': array([0.12588]),
252
  'xaxis': 'x',
253
- 'y': array(['Vicuna v1.5 (7B)'], dtype=object),
254
  'yaxis': 'y'},
255
  {'alignmentgroup': 'True',
256
  'hovertemplate': 'Model=%{y}<br>Execution cost ($)=%{x}<extra></extra>',
257
- 'legendgroup': 'Falcon Instruct (7B)',
258
  'marker': {'color': '#FF97FF', 'pattern': {'shape': ''}},
259
- 'name': 'Falcon Instruct (7B)',
260
- 'offsetgroup': 'Falcon Instruct (7B)',
261
  'orientation': 'h',
262
  'showlegend': True,
263
  'textposition': 'auto',
264
  'type': 'bar',
265
- 'x': array([0.124768]),
266
  'xaxis': 'x',
267
- 'y': array(['Falcon Instruct (7B)'], dtype=object),
268
  'yaxis': 'y'},
269
  {'alignmentgroup': 'True',
270
  'hovertemplate': 'Model=%{y}<br>Execution cost ($)=%{x}<extra></extra>',
271
- 'legendgroup': 'RedPajama-INCITE Chat (7B)',
272
  'marker': {'color': '#FECB52', 'pattern': {'shape': ''}},
273
- 'name': 'RedPajama-INCITE Chat (7B)',
274
- 'offsetgroup': 'RedPajama-INCITE Chat (7B)',
275
  'orientation': 'h',
276
  'showlegend': True,
277
  'textposition': 'auto',
278
  'type': 'bar',
279
- 'x': array([0.123424]),
280
  'xaxis': 'x',
281
- 'y': array(['RedPajama-INCITE Chat (7B)'], dtype=object),
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
282
  'yaxis': 'y'}],
283
  'layout': {'barmode': 'relative',
284
  'legend': {'title': {'text': 'Model'}, 'tracegroupgap': 0},
@@ -286,43 +328,48 @@ execution_costs,./html/plots/execution_costs.html,"Figure({
286
  'title': {'text': 'Costs of execution of 6660 test queries per model'},
287
  'xaxis': {'anchor': 'y', 'domain': [0.0, 1.0], 'title': {'text': 'Execution cost ($)'}},
288
  'yaxis': {'anchor': 'x',
289
- 'categoryarray': [RedPajama-INCITE Chat (7B), Falcon
290
- Instruct (7B), Vicuna v1.5 (7B), Qwen
291
- 1.5 Chat (7B), OpenHermes-2.5-Mistral
292
- (7B), LLaMA-2 Chat (7B), Mistral (7B)
293
- Instruct v0.2 (Together AI), Upstage
294
- SOLAR Instruct v1 (11B), chat-bison-32k
295
- (PaLM 2 32K), chat-bison (PaLM 2),
296
- gemini-pro, WizardLM v1.2 (13B), Chronos
297
- Hermes (13B), Snorkel Mistral PairRM DPO
298
- (7B), 01-ai Yi Chat (34B),
299
- Mixtral-8x7B-Instruct-v0.1,
300
- llama-2-70b-chat, gpt-3.5-turbo,
301
- gpt-4-turbo, gpt-4],
 
 
302
  'categoryorder': 'array',
303
  'domain': [0.0, 1.0],
304
  'title': {'text': 'Model'}}}
305
- })",Costs of execution of 6660 test queries per model,,"{""data"":[{""alignmentgroup"":""True"",""hovertemplate"":""Model=%{y}\u003cbr\u003eExecution cost ($)=%{x}\u003cextra\u003e\u003c\u002fextra\u003e"",""legendgroup"":""gpt-4"",""marker"":{""color"":""#636efa"",""pattern"":{""shape"":""""}},""name"":""gpt-4"",""offsetgroup"":""gpt-4"",""orientation"":""h"",""showlegend"":true,""textposition"":""auto"",""x"":[45.870000000000005],""xaxis"":""x"",""y"":[""gpt-4""],""yaxis"":""y"",""type"":""bar""},{""alignmentgroup"":""True"",""hovertemplate"":""Model=%{y}\u003cbr\u003eExecution cost ($)=%{x}\u003cextra\u003e\u003c\u002fextra\u003e"",""legendgroup"":""gpt-4-turbo"",""marker"":{""color"":""#EF553B"",""pattern"":{""shape"":""""}},""name"":""gpt-4-turbo"",""offsetgroup"":""gpt-4-turbo"",""orientation"":""h"",""showlegend"":true,""textposition"":""auto"",""x"":[19.2168],""xaxis"":""x"",""y"":[""gpt-4-turbo""],""yaxis"":""y"",""type"":""bar""},{""alignmentgroup"":""True"",""hovertemplate"":""Model=%{y}\u003cbr\u003eExecution cost ($)=%{x}\u003cextra\u003e\u003c\u002fextra\u003e"",""legendgroup"":""gpt-3.5-turbo"",""marker"":{""color"":""#00cc96"",""pattern"":{""shape"":""""}},""name"":""gpt-3.5-turbo"",""offsetgroup"":""gpt-3.5-turbo"",""orientation"":""h"",""showlegend"":true,""textposition"":""auto"",""x"":[1.75176],""xaxis"":""x"",""y"":[""gpt-3.5-turbo""],""yaxis"":""y"",""type"":""bar""},{""alignmentgroup"":""True"",""hovertemplate"":""Model=%{y}\u003cbr\u003eExecution cost ($)=%{x}\u003cextra\u003e\u003c\u002fextra\u003e"",""legendgroup"":""llama-2-70b-chat"",""marker"":{""color"":""#ab63fa"",""pattern"":{""shape"":""""}},""name"":""llama-2-70b-chat"",""offsetgroup"":""llama-2-70b-chat"",""orientation"":""h"",""showlegend"":true,""textposition"":""auto"",""x"":[0.65934],""xaxis"":""x"",""y"":[""llama-2-70b-chat""],""yaxis"":""y"",""type"":""bar""},{""alignmentgroup"":""True"",""hovertemplate"":""Model=%{y}\u003cbr\u003eExecution cost ($)=%{x}\u003cextra\u003e\u003c\u002fextra\u003e"",""legendgroup"":""Mixtral-8x7B-Instruct-v0.1"",""marker"":{""color"":""#FFA15A"",""pattern"":{""shape"":""""}},""name"":""Mixtral-8x7B-Instruct-v0.1"",""offsetgroup"":""Mixtral-8x7B-Instruct-v0.1"",""orientation"":""h"",""showlegend"":true,""textposition"":""auto"",""x"":[0.65934],""xaxis"":""x"",""y"":[""Mixtral-8x7B-Instruct-v0.1""],""yaxis"":""y"",""type"":""bar""},{""alignmentgroup"":""True"",""hovertemplate"":""Model=%{y}\u003cbr\u003eExecution cost ($)=%{x}\u003cextra\u003e\u003c\u002fextra\u003e"",""legendgroup"":""01-ai Yi Chat (34B)"",""marker"":{""color"":""#19d3f3"",""pattern"":{""shape"":""""}},""name"":""01-ai Yi Chat (34B)"",""offsetgroup"":""01-ai Yi Chat (34B)"",""orientation"":""h"",""showlegend"":true,""textposition"":""auto"",""x"":[0.5818400000000001],""xaxis"":""x"",""y"":[""01-ai Yi Chat (34B)""],""yaxis"":""y"",""type"":""bar""},{""alignmentgroup"":""True"",""hovertemplate"":""Model=%{y}\u003cbr\u003eExecution cost ($)=%{x}\u003cextra\u003e\u003c\u002fextra\u003e"",""legendgroup"":""Snorkel Mistral PairRM DPO (7B)"",""marker"":{""color"":""#FF6692"",""pattern"":{""shape"":""""}},""name"":""Snorkel Mistral PairRM DPO (7B)"",""offsetgroup"":""Snorkel Mistral PairRM DPO (7B)"",""orientation"":""h"",""showlegend"":true,""textposition"":""auto"",""x"":[0.334256],""xaxis"":""x"",""y"":[""Snorkel Mistral PairRM DPO (7B)""],""yaxis"":""y"",""type"":""bar""},{""alignmentgroup"":""True"",""hovertemplate"":""Model=%{y}\u003cbr\u003eExecution cost ($)=%{x}\u003cextra\u003e\u003c\u002fextra\u003e"",""legendgroup"":""Chronos Hermes (13B)"",""marker"":{""color"":""#B6E880"",""pattern"":{""shape"":""""}},""name"":""Chronos Hermes (13B)"",""offsetgroup"":""Chronos Hermes (13B)"",""orientation"":""h"",""showlegend"":true,""textposition"":""auto"",""x"":[0.27396],""xaxis"":""x"",""y"":[""Chronos Hermes (13B)""],""yaxis"":""y"",""type"":""bar""},{""alignmentgroup"":""True"",""hovertemplate"":""Model=%{y}\u003cbr\u003eExecution cost ($)=%{x}\u003cextra\u003e\u003c\u002fextra\u003e"",""legendgroup"":""WizardLM v1.2 (13B)"",""marker"":{""color"":""#FF97FF"",""pattern"":{""shape"":""""}},""name"":""WizardLM v1.2 (13B)"",""offsetgroup"":""WizardLM v1.2 (13B)"",""orientation"":""h"",""showlegend"":true,""textposition"":""auto"",""x"":[0.21207],""xaxis"":""x"",""y"":[""WizardLM v1.2 (13B)""],""yaxis"":""y"",""type"":""bar""},{""alignmentgroup"":""True"",""hovertemplate"":""Model=%{y}\u003cbr\u003eExecution cost ($)=%{x}\u003cextra\u003e\u003c\u002fextra\u003e"",""legendgroup"":""gemini-pro"",""marker"":{""color"":""#FECB52"",""pattern"":{""shape"":""""}},""name"":""gemini-pro"",""offsetgroup"":""gemini-pro"",""orientation"":""h"",""showlegend"":true,""textposition"":""auto"",""x"":[0.18315],""xaxis"":""x"",""y"":[""gemini-pro""],""yaxis"":""y"",""type"":""bar""},{""alignmentgroup"":""True"",""hovertemplate"":""Model=%{y}\u003cbr\u003eExecution cost ($)=%{x}\u003cextra\u003e\u003c\u002fextra\u003e"",""legendgroup"":""chat-bison (PaLM 2)"",""marker"":{""color"":""#636efa"",""pattern"":{""shape"":""""}},""name"":""chat-bison (PaLM 2)"",""offsetgroup"":""chat-bison (PaLM 2)"",""orientation"":""h"",""showlegend"":true,""textposition"":""auto"",""x"":[0.18315],""xaxis"":""x"",""y"":[""chat-bison (PaLM 2)""],""yaxis"":""y"",""type"":""bar""},{""alignmentgroup"":""True"",""hovertemplate"":""Model=%{y}\u003cbr\u003eExecution cost ($)=%{x}\u003cextra\u003e\u003c\u002fextra\u003e"",""legendgroup"":""chat-bison-32k (PaLM 2 32K)"",""marker"":{""color"":""#EF553B"",""pattern"":{""shape"":""""}},""name"":""chat-bison-32k (PaLM 2 32K)"",""offsetgroup"":""chat-bison-32k (PaLM 2 32K)"",""orientation"":""h"",""showlegend"":true,""textposition"":""auto"",""x"":[0.18315],""xaxis"":""x"",""y"":[""chat-bison-32k (PaLM 2 32K)""],""yaxis"":""y"",""type"":""bar""},{""alignmentgroup"":""True"",""hovertemplate"":""Model=%{y}\u003cbr\u003eExecution cost ($)=%{x}\u003cextra\u003e\u003c\u002fextra\u003e"",""legendgroup"":""Upstage SOLAR Instruct v1 (11B)"",""marker"":{""color"":""#00cc96"",""pattern"":{""shape"":""""}},""name"":""Upstage SOLAR Instruct v1 (11B)"",""offsetgroup"":""Upstage SOLAR Instruct v1 (11B)"",""orientation"":""h"",""showlegend"":true,""textposition"":""auto"",""x"":[0.180288],""xaxis"":""x"",""y"":[""Upstage SOLAR Instruct v1 (11B)""],""yaxis"":""y"",""type"":""bar""},{""alignmentgroup"":""True"",""hovertemplate"":""Model=%{y}\u003cbr\u003eExecution cost ($)=%{x}\u003cextra\u003e\u003c\u002fextra\u003e"",""legendgroup"":""Mistral (7B) Instruct v0.2 (Together AI)"",""marker"":{""color"":""#ab63fa"",""pattern"":{""shape"":""""}},""name"":""Mistral (7B) Instruct v0.2 (Together AI)"",""offsetgroup"":""Mistral (7B) Instruct v0.2 (Together AI)"",""orientation"":""h"",""showlegend"":true,""textposition"":""auto"",""x"":[0.16515400000000002],""xaxis"":""x"",""y"":[""Mistral (7B) Instruct v0.2 (Together AI)""],""yaxis"":""y"",""type"":""bar""},{""alignmentgroup"":""True"",""hovertemplate"":""Model=%{y}\u003cbr\u003eExecution cost ($)=%{x}\u003cextra\u003e\u003c\u002fextra\u003e"",""legendgroup"":""LLaMA-2 Chat (7B)"",""marker"":{""color"":""#FFA15A"",""pattern"":{""shape"":""""}},""name"":""LLaMA-2 Chat (7B)"",""offsetgroup"":""LLaMA-2 Chat (7B)"",""orientation"":""h"",""showlegend"":true,""textposition"":""auto"",""x"":[0.16329600000000002],""xaxis"":""x"",""y"":[""LLaMA-2 Chat (7B)""],""yaxis"":""y"",""type"":""bar""},{""alignmentgroup"":""True"",""hovertemplate"":""Model=%{y}\u003cbr\u003eExecution cost ($)=%{x}\u003cextra\u003e\u003c\u002fextra\u003e"",""legendgroup"":""OpenHermes-2.5-Mistral (7B)"",""marker"":{""color"":""#19d3f3"",""pattern"":{""shape"":""""}},""name"":""OpenHermes-2.5-Mistral (7B)"",""offsetgroup"":""OpenHermes-2.5-Mistral (7B)"",""orientation"":""h"",""showlegend"":true,""textposition"":""auto"",""x"":[0.14182000000000003],""xaxis"":""x"",""y"":[""OpenHermes-2.5-Mistral (7B)""],""yaxis"":""y"",""type"":""bar""},{""alignmentgroup"":""True"",""hovertemplate"":""Model=%{y}\u003cbr\u003eExecution cost ($)=%{x}\u003cextra\u003e\u003c\u002fextra\u003e"",""legendgroup"":""Qwen 1.5 Chat (7B)"",""marker"":{""color"":""#FF6692"",""pattern"":{""shape"":""""}},""name"":""Qwen 1.5 Chat (7B)"",""offsetgroup"":""Qwen 1.5 Chat (7B)"",""orientation"":""h"",""showlegend"":true,""textposition"":""auto"",""x"":[0.13759200000000002],""xaxis"":""x"",""y"":[""Qwen 1.5 Chat (7B)""],""yaxis"":""y"",""type"":""bar""},{""alignmentgroup"":""True"",""hovertemplate"":""Model=%{y}\u003cbr\u003eExecution cost ($)=%{x}\u003cextra\u003e\u003c\u002fextra\u003e"",""legendgroup"":""Vicuna v1.5 (7B)"",""marker"":{""color"":""#B6E880"",""pattern"":{""shape"":""""}},""name"":""Vicuna v1.5 (7B)"",""offsetgroup"":""Vicuna v1.5 (7B)"",""orientation"":""h"",""showlegend"":true,""textposition"":""auto"",""x"":[0.12588],""xaxis"":""x"",""y"":[""Vicuna v1.5 (7B)""],""yaxis"":""y"",""type"":""bar""},{""alignmentgroup"":""True"",""hovertemplate"":""Model=%{y}\u003cbr\u003eExecution cost ($)=%{x}\u003cextra\u003e\u003c\u002fextra\u003e"",""legendgroup"":""Falcon Instruct (7B)"",""marker"":{""color"":""#FF97FF"",""pattern"":{""shape"":""""}},""name"":""Falcon Instruct (7B)"",""offsetgroup"":""Falcon Instruct (7B)"",""orientation"":""h"",""showlegend"":true,""textposition"":""auto"",""x"":[0.12476800000000002],""xaxis"":""x"",""y"":[""Falcon Instruct (7B)""],""yaxis"":""y"",""type"":""bar""},{""alignmentgroup"":""True"",""hovertemplate"":""Model=%{y}\u003cbr\u003eExecution cost ($)=%{x}\u003cextra\u003e\u003c\u002fextra\u003e"",""legendgroup"":""RedPajama-INCITE Chat (7B)"",""marker"":{""color"":""#FECB52"",""pattern"":{""shape"":""""}},""name"":""RedPajama-INCITE Chat (7B)"",""offsetgroup"":""RedPajama-INCITE Chat (7B)"",""orientation"":""h"",""showlegend"":true,""textposition"":""auto"",""x"":[0.12342400000000002],""xaxis"":""x"",""y"":[""RedPajama-INCITE Chat (7B)""],""yaxis"":""y"",""type"":""bar""}],""layout"":{""template"":{""data"":{""histogram2dcontour"":[{""type"":""histogram2dcontour"",""colorbar"":{""outlinewidth"":0,""ticks"":""""},""colorscale"":[[0.0,""#0d0887""],[0.1111111111111111,""#46039f""],[0.2222222222222222,""#7201a8""],[0.3333333333333333,""#9c179e""],[0.4444444444444444,""#bd3786""],[0.5555555555555556,""#d8576b""],[0.6666666666666666,""#ed7953""],[0.7777777777777778,""#fb9f3a""],[0.8888888888888888,""#fdca26""],[1.0,""#f0f921""]]}],""choropleth"":[{""type"":""choropleth"",""colorbar"":{""outlinewidth"":0,""ticks"":""""}}],""histogram2d"":[{""type"":""histogram2d"",""colorbar"":{""outlinewidth"":0,""ticks"":""""},""colorscale"":[[0.0,""#0d0887""],[0.1111111111111111,""#46039f""],[0.2222222222222222,""#7201a8""],[0.3333333333333333,""#9c179e""],[0.4444444444444444,""#bd3786""],[0.5555555555555556,""#d8576b""],[0.6666666666666666,""#ed7953""],[0.7777777777777778,""#fb9f3a""],[0.8888888888888888,""#fdca26""],[1.0,""#f0f921""]]}],""heatmap"":[{""type"":""heatmap"",""colorbar"":{""outlinewidth"":0,""ticks"":""""},""colorscale"":[[0.0,""#0d0887""],[0.1111111111111111,""#46039f""],[0.2222222222222222,""#7201a8""],[0.3333333333333333,""#9c179e""],[0.4444444444444444,""#bd3786""],[0.5555555555555556,""#d8576b""],[0.6666666666666666,""#ed7953""],[0.7777777777777778,""#fb9f3a""],[0.8888888888888888,""#fdca26""],[1.0,""#f0f921""]]}],""heatmapgl"":[{""type"":""heatmapgl"",""colorbar"":{""outlinewidth"":0,""ticks"":""""},""colorscale"":[[0.0,""#0d0887""],[0.1111111111111111,""#46039f""],[0.2222222222222222,""#7201a8""],[0.3333333333333333,""#9c179e""],[0.4444444444444444,""#bd3786""],[0.5555555555555556,""#d8576b""],[0.6666666666666666,""#ed7953""],[0.7777777777777778,""#fb9f3a""],[0.8888888888888888,""#fdca26""],[1.0,""#f0f921""]]}],""contourcarpet"":[{""type"":""contourcarpet"",""colorbar"":{""outlinewidth"":0,""ticks"":""""}}],""contour"":[{""type"":""contour"",""colorbar"":{""outlinewidth"":0,""ticks"":""""},""colorscale"":[[0.0,""#0d0887""],[0.1111111111111111,""#46039f""],[0.2222222222222222,""#7201a8""],[0.3333333333333333,""#9c179e""],[0.4444444444444444,""#bd3786""],[0.5555555555555556,""#d8576b""],[0.6666666666666666,""#ed7953""],[0.7777777777777778,""#fb9f3a""],[0.8888888888888888,""#fdca26""],[1.0,""#f0f921""]]}],""surface"":[{""type"":""surface"",""colorbar"":{""outlinewidth"":0,""ticks"":""""},""colorscale"":[[0.0,""#0d0887""],[0.1111111111111111,""#46039f""],[0.2222222222222222,""#7201a8""],[0.3333333333333333,""#9c179e""],[0.4444444444444444,""#bd3786""],[0.5555555555555556,""#d8576b""],[0.6666666666666666,""#ed7953""],[0.7777777777777778,""#fb9f3a""],[0.8888888888888888,""#fdca26""],[1.0,""#f0f921""]]}],""mesh3d"":[{""type"":""mesh3d"",""colorbar"":{""outlinewidth"":0,""ticks"":""""}}],""scatter"":[{""fillpattern"":{""fillmode"":""overlay"",""size"":10,""solidity"":0.2},""type"":""scatter""}],""parcoords"":[{""type"":""parcoords"",""line"":{""colorbar"":{""outlinewidth"":0,""ticks"":""""}}}],""scatterpolargl"":[{""type"":""scatterpolargl"",""marker"":{""colorbar"":{""outlinewidth"":0,""ticks"":""""}}}],""bar"":[{""error_x"":{""color"":""#2a3f5f""},""error_y"":{""color"":""#2a3f5f""},""marker"":{""line"":{""color"":""#E5ECF6"",""width"":0.5},""pattern"":{""fillmode"":""overlay"",""size"":10,""solidity"":0.2}},""type"":""bar""}],""scattergeo"":[{""type"":""scattergeo"",""marker"":{""colorbar"":{""outlinewidth"":0,""ticks"":""""}}}],""scatterpolar"":[{""type"":""scatterpolar"",""marker"":{""colorbar"":{""outlinewidth"":0,""ticks"":""""}}}],""histogram"":[{""marker"":{""pattern"":{""fillmode"":""overlay"",""size"":10,""solidity"":0.2}},""type"":""histogram""}],""scattergl"":[{""type"":""scattergl"",""marker"":{""colorbar"":{""outlinewidth"":0,""ticks"":""""}}}],""scatter3d"":[{""type"":""scatter3d"",""line"":{""colorbar"":{""outlinewidth"":0,""ticks"":""""}},""marker"":{""colorbar"":{""outlinewidth"":0,""ticks"":""""}}}],""scattermapbox"":[{""type"":""scattermapbox"",""marker"":{""colorbar"":{""outlinewidth"":0,""ticks"":""""}}}],""scatterternary"":[{""type"":""scatterternary"",""marker"":{""colorbar"":{""outlinewidth"":0,""ticks"":""""}}}],""scattercarpet"":[{""type"":""scattercarpet"",""marker"":{""colorbar"":{""outlinewidth"":0,""ticks"":""""}}}],""carpet"":[{""aaxis"":{""endlinecolor"":""#2a3f5f"",""gridcolor"":""white"",""linecolor"":""white"",""minorgridcolor"":""white"",""startlinecolor"":""#2a3f5f""},""baxis"":{""endlinecolor"":""#2a3f5f"",""gridcolor"":""white"",""linecolor"":""white"",""minorgridcolor"":""white"",""startlinecolor"":""#2a3f5f""},""type"":""carpet""}],""table"":[{""cells"":{""fill"":{""color"":""#EBF0F8""},""line"":{""color"":""white""}},""header"":{""fill"":{""color"":""#C8D4E3""},""line"":{""color"":""white""}},""type"":""table""}],""barpolar"":[{""marker"":{""line"":{""color"":""#E5ECF6"",""width"":0.5},""pattern"":{""fillmode"":""overlay"",""size"":10,""solidity"":0.2}},""type"":""barpolar""}],""pie"":[{""automargin"":true,""type"":""pie""}]},""layout"":{""autotypenumbers"":""strict"",""colorway"":[""#636efa"",""#EF553B"",""#00cc96"",""#ab63fa"",""#FFA15A"",""#19d3f3"",""#FF6692"",""#B6E880"",""#FF97FF"",""#FECB52""],""font"":{""color"":""#2a3f5f""},""hovermode"":""closest"",""hoverlabel"":{""align"":""left""},""paper_bgcolor"":""white"",""plot_bgcolor"":""#E5ECF6"",""polar"":{""bgcolor"":""#E5ECF6"",""angularaxis"":{""gridcolor"":""white"",""linecolor"":""white"",""ticks"":""""},""radialaxis"":{""gridcolor"":""white"",""linecolor"":""white"",""ticks"":""""}},""ternary"":{""bgcolor"":""#E5ECF6"",""aaxis"":{""gridcolor"":""white"",""linecolor"":""white"",""ticks"":""""},""baxis"":{""gridcolor"":""white"",""linecolor"":""white"",""ticks"":""""},""caxis"":{""gridcolor"":""white"",""linecolor"":""white"",""ticks"":""""}},""coloraxis"":{""colorbar"":{""outlinewidth"":0,""ticks"":""""}},""colorscale"":{""sequential"":[[0.0,""#0d0887""],[0.1111111111111111,""#46039f""],[0.2222222222222222,""#7201a8""],[0.3333333333333333,""#9c179e""],[0.4444444444444444,""#bd3786""],[0.5555555555555556,""#d8576b""],[0.6666666666666666,""#ed7953""],[0.7777777777777778,""#fb9f3a""],[0.8888888888888888,""#fdca26""],[1.0,""#f0f921""]],""sequentialminus"":[[0.0,""#0d0887""],[0.1111111111111111,""#46039f""],[0.2222222222222222,""#7201a8""],[0.3333333333333333,""#9c179e""],[0.4444444444444444,""#bd3786""],[0.5555555555555556,""#d8576b""],[0.6666666666666666,""#ed7953""],[0.7777777777777778,""#fb9f3a""],[0.8888888888888888,""#fdca26""],[1.0,""#f0f921""]],""diverging"":[[0,""#8e0152""],[0.1,""#c51b7d""],[0.2,""#de77ae""],[0.3,""#f1b6da""],[0.4,""#fde0ef""],[0.5,""#f7f7f7""],[0.6,""#e6f5d0""],[0.7,""#b8e186""],[0.8,""#7fbc41""],[0.9,""#4d9221""],[1,""#276419""]]},""xaxis"":{""gridcolor"":""white"",""linecolor"":""white"",""ticks"":"""",""title"":{""standoff"":15},""zerolinecolor"":""white"",""automargin"":true,""zerolinewidth"":2},""yaxis"":{""gridcolor"":""white"",""linecolor"":""white"",""ticks"":"""",""title"":{""standoff"":15},""zerolinecolor"":""white"",""automargin"":true,""zerolinewidth"":2},""scene"":{""xaxis"":{""backgroundcolor"":""#E5ECF6"",""gridcolor"":""white"",""linecolor"":""white"",""showbackground"":true,""ticks"":"""",""zerolinecolor"":""white"",""gridwidth"":2},""yaxis"":{""backgroundcolor"":""#E5ECF6"",""gridcolor"":""white"",""linecolor"":""white"",""showbackground"":true,""ticks"":"""",""zerolinecolor"":""white"",""gridwidth"":2},""zaxis"":{""backgroundcolor"":""#E5ECF6"",""gridcolor"":""white"",""linecolor"":""white"",""showbackground"":true,""ticks"":"""",""zerolinecolor"":""white"",""gridwidth"":2}},""shapedefaults"":{""line"":{""color"":""#2a3f5f""}},""annotationdefaults"":{""arrowcolor"":""#2a3f5f"",""arrowhead"":0,""arrowwidth"":1},""geo"":{""bgcolor"":""white"",""landcolor"":""#E5ECF6"",""subunitcolor"":""white"",""showland"":true,""showlakes"":true,""lakecolor"":""white""},""title"":{""x"":0.05},""mapbox"":{""style"":""light""}}},""xaxis"":{""anchor"":""y"",""domain"":[0.0,1.0],""title"":{""text"":""Execution cost ($)""}},""yaxis"":{""anchor"":""x"",""domain"":[0.0,1.0],""title"":{""text"":""Model""},""categoryorder"":""array"",""categoryarray"":[""RedPajama-INCITE Chat (7B)"",""Falcon Instruct (7B)"",""Vicuna v1.5 (7B)"",""Qwen 1.5 Chat (7B)"",""OpenHermes-2.5-Mistral (7B)"",""LLaMA-2 Chat (7B)"",""Mistral (7B) Instruct v0.2 (Together AI)"",""Upstage SOLAR Instruct v1 (11B)"",""chat-bison-32k (PaLM 2 32K)"",""chat-bison (PaLM 2)"",""gemini-pro"",""WizardLM v1.2 (13B)"",""Chronos Hermes (13B)"",""Snorkel Mistral PairRM DPO (7B)"",""01-ai Yi Chat (34B)"",""Mixtral-8x7B-Instruct-v0.1"",""llama-2-70b-chat"",""gpt-3.5-turbo"",""gpt-4-turbo"",""gpt-4""]},""legend"":{""title"":{""text"":""Model""},""tracegroupgap"":0},""title"":{""text"":""Costs of execution of 6660 test queries per model""},""barmode"":""relative""}}","{""y"": ""model"", ""x"": ""model_query_costs"", ""color"": ""model"", ""orientation"": ""h"", ""title"": ""Costs of execution of 6660 test queries per model"", ""labels"": {""model"": ""Model"", ""model_query_costs"": ""Execution cost ($)""}}",",model_query_costs,model
306
- 2,45.870000000000005,gpt-4
307
- 1,19.2168,gpt-4-turbo
308
- 0,1.75176,gpt-3.5-turbo
309
- 3,0.65934,llama-2-70b-chat
310
- 4,0.65934,Mixtral-8x7B-Instruct-v0.1
311
- 11,0.5818400000000001,01-ai Yi Chat (34B)
312
- 43,0.334256,Snorkel Mistral PairRM DPO (7B)
313
- 12,0.27396,Chronos Hermes (13B)
314
- 55,0.21207,WizardLM v1.2 (13B)
315
- 8,0.18315,gemini-pro
316
- 9,0.18315,chat-bison (PaLM 2)
317
- 10,0.18315,chat-bison-32k (PaLM 2 32K)
318
- 56,0.180288,Upstage SOLAR Instruct v1 (11B)
319
- 26,0.16515400000000002,Mistral (7B) Instruct v0.2 (Together AI)
320
- 24,0.16329600000000002,LLaMA-2 Chat (7B)
321
- 46,0.14182000000000003,OpenHermes-2.5-Mistral (7B)
322
- 40,0.13759200000000002,Qwen 1.5 Chat (7B)
323
- 17,0.12588,Vicuna v1.5 (7B)
324
- 48,0.12476800000000002,Falcon Instruct (7B)
325
- 51,0.12342400000000002,RedPajama-INCITE Chat (7B)
 
 
 
326
  "
327
  model_sizes,./html/plots/model_sizes.html,"Figure({
328
  'data': [{'alignmentgroup': 'True',
 
10
  'showlegend': True,
11
  'textposition': 'auto',
12
  'type': 'bar',
13
+ 'x': array([9.1329]),
14
  'xaxis': 'x',
15
  'y': array(['gpt-4'], dtype=object),
16
  'yaxis': 'y'},
 
24
  'showlegend': True,
25
  'textposition': 'auto',
26
  'type': 'bar',
27
+ 'x': array([6.7599]),
28
  'xaxis': 'x',
29
  'y': array(['gpt-4-turbo'], dtype=object),
30
  'yaxis': 'y'},
31
  {'alignmentgroup': 'True',
32
  'hovertemplate': 'Model=%{y}<br>Execution cost ($)=%{x}<extra></extra>',
33
+ 'legendgroup': 'Mixtral-8x7B-Instruct-v0.1',
34
  'marker': {'color': '#00cc96', 'pattern': {'shape': ''}},
35
+ 'name': 'Mixtral-8x7B-Instruct-v0.1',
36
+ 'offsetgroup': 'Mixtral-8x7B-Instruct-v0.1',
37
  'orientation': 'h',
38
  'showlegend': True,
39
  'textposition': 'auto',
40
  'type': 'bar',
41
+ 'x': array([0.539613]),
42
  'xaxis': 'x',
43
+ 'y': array(['Mixtral-8x7B-Instruct-v0.1'], dtype=object),
44
  'yaxis': 'y'},
45
  {'alignmentgroup': 'True',
46
  'hovertemplate': 'Model=%{y}<br>Execution cost ($)=%{x}<extra></extra>',
47
+ 'legendgroup': 'zephyr-7b-beta',
48
  'marker': {'color': '#ab63fa', 'pattern': {'shape': ''}},
49
+ 'name': 'zephyr-7b-beta',
50
+ 'offsetgroup': 'zephyr-7b-beta',
51
  'orientation': 'h',
52
  'showlegend': True,
53
  'textposition': 'auto',
54
  'type': 'bar',
55
+ 'x': array([0.49900073]),
56
  'xaxis': 'x',
57
+ 'y': array(['zephyr-7b-beta'], dtype=object),
58
  'yaxis': 'y'},
59
  {'alignmentgroup': 'True',
60
  'hovertemplate': 'Model=%{y}<br>Execution cost ($)=%{x}<extra></extra>',
61
+ 'legendgroup': '01-ai Yi Chat (34B)',
62
  'marker': {'color': '#FFA15A', 'pattern': {'shape': ''}},
63
+ 'name': '01-ai Yi Chat (34B)',
64
+ 'offsetgroup': '01-ai Yi Chat (34B)',
65
  'orientation': 'h',
66
  'showlegend': True,
67
  'textposition': 'auto',
68
  'type': 'bar',
69
+ 'x': array([0.45192]),
70
  'xaxis': 'x',
71
+ 'y': array(['01-ai Yi Chat (34B)'], dtype=object),
72
  'yaxis': 'y'},
73
  {'alignmentgroup': 'True',
74
  'hovertemplate': 'Model=%{y}<br>Execution cost ($)=%{x}<extra></extra>',
75
+ 'legendgroup': 'llama-2-70b-chat',
76
  'marker': {'color': '#19d3f3', 'pattern': {'shape': ''}},
77
+ 'name': 'llama-2-70b-chat',
78
+ 'offsetgroup': 'llama-2-70b-chat',
79
  'orientation': 'h',
80
  'showlegend': True,
81
  'textposition': 'auto',
82
  'type': 'bar',
83
+ 'x': array([0.355275]),
84
  'xaxis': 'x',
85
+ 'y': array(['llama-2-70b-chat'], dtype=object),
86
  'yaxis': 'y'},
87
  {'alignmentgroup': 'True',
88
  'hovertemplate': 'Model=%{y}<br>Execution cost ($)=%{x}<extra></extra>',
89
+ 'legendgroup': 'gpt-3.5-turbo',
90
  'marker': {'color': '#FF6692', 'pattern': {'shape': ''}},
91
+ 'name': 'gpt-3.5-turbo',
92
+ 'offsetgroup': 'gpt-3.5-turbo',
93
  'orientation': 'h',
94
  'showlegend': True,
95
  'textposition': 'auto',
96
  'type': 'bar',
97
+ 'x': array([0.33931]),
98
  'xaxis': 'x',
99
+ 'y': array(['gpt-3.5-turbo'], dtype=object),
100
  'yaxis': 'y'},
101
  {'alignmentgroup': 'True',
102
  'hovertemplate': 'Model=%{y}<br>Execution cost ($)=%{x}<extra></extra>',
103
+ 'legendgroup': 'Mistral-7B-Instruct-v0.2',
104
  'marker': {'color': '#B6E880', 'pattern': {'shape': ''}},
105
+ 'name': 'Mistral-7B-Instruct-v0.2',
106
+ 'offsetgroup': 'Mistral-7B-Instruct-v0.2',
107
  'orientation': 'h',
108
  'showlegend': True,
109
  'textposition': 'auto',
110
  'type': 'bar',
111
+ 'x': array([0.29065089]),
112
  'xaxis': 'x',
113
+ 'y': array(['Mistral-7B-Instruct-v0.2'], dtype=object),
114
  'yaxis': 'y'},
115
  {'alignmentgroup': 'True',
116
  'hovertemplate': 'Model=%{y}<br>Execution cost ($)=%{x}<extra></extra>',
117
+ 'legendgroup': 'Snorkel Mistral PairRM DPO (7B)',
118
  'marker': {'color': '#FF97FF', 'pattern': {'shape': ''}},
119
+ 'name': 'Snorkel Mistral PairRM DPO (7B)',
120
+ 'offsetgroup': 'Snorkel Mistral PairRM DPO (7B)',
121
  'orientation': 'h',
122
  'showlegend': True,
123
  'textposition': 'auto',
124
  'type': 'bar',
125
+ 'x': array([0.176236]),
126
  'xaxis': 'x',
127
+ 'y': array(['Snorkel Mistral PairRM DPO (7B)'], dtype=object),
128
  'yaxis': 'y'},
129
  {'alignmentgroup': 'True',
130
  'hovertemplate': 'Model=%{y}<br>Execution cost ($)=%{x}<extra></extra>',
131
+ 'legendgroup': 'Chronos Hermes (13B)',
132
  'marker': {'color': '#FECB52', 'pattern': {'shape': ''}},
133
+ 'name': 'Chronos Hermes (13B)',
134
+ 'offsetgroup': 'Chronos Hermes (13B)',
135
  'orientation': 'h',
136
  'showlegend': True,
137
  'textposition': 'auto',
138
  'type': 'bar',
139
+ 'x': array([0.158268]),
140
  'xaxis': 'x',
141
+ 'y': array(['Chronos Hermes (13B)'], dtype=object),
142
  'yaxis': 'y'},
143
  {'alignmentgroup': 'True',
144
  'hovertemplate': 'Model=%{y}<br>Execution cost ($)=%{x}<extra></extra>',
145
+ 'legendgroup': 'WizardLM v1.2 (13B)',
146
  'marker': {'color': '#636efa', 'pattern': {'shape': ''}},
147
+ 'name': 'WizardLM v1.2 (13B)',
148
+ 'offsetgroup': 'WizardLM v1.2 (13B)',
149
  'orientation': 'h',
150
  'showlegend': True,
151
  'textposition': 'auto',
152
  'type': 'bar',
153
+ 'x': array([0.147276]),
154
  'xaxis': 'x',
155
+ 'y': array(['WizardLM v1.2 (13B)'], dtype=object),
156
  'yaxis': 'y'},
157
  {'alignmentgroup': 'True',
158
  'hovertemplate': 'Model=%{y}<br>Execution cost ($)=%{x}<extra></extra>',
159
+ 'legendgroup': 'Upstage SOLAR Instruct v1 (11B)',
160
  'marker': {'color': '#EF553B', 'pattern': {'shape': ''}},
161
+ 'name': 'Upstage SOLAR Instruct v1 (11B)',
162
+ 'offsetgroup': 'Upstage SOLAR Instruct v1 (11B)',
163
  'orientation': 'h',
164
  'showlegend': True,
165
  'textposition': 'auto',
166
  'type': 'bar',
167
+ 'x': array([0.117306]),
168
  'xaxis': 'x',
169
+ 'y': array(['Upstage SOLAR Instruct v1 (11B)'], dtype=object),
170
  'yaxis': 'y'},
171
  {'alignmentgroup': 'True',
172
  'hovertemplate': 'Model=%{y}<br>Execution cost ($)=%{x}<extra></extra>',
173
+ 'legendgroup': 'LLaMA-2 Chat (7B)',
174
  'marker': {'color': '#00cc96', 'pattern': {'shape': ''}},
175
+ 'name': 'LLaMA-2 Chat (7B)',
176
+ 'offsetgroup': 'LLaMA-2 Chat (7B)',
177
  'orientation': 'h',
178
  'showlegend': True,
179
  'textposition': 'auto',
180
  'type': 'bar',
181
+ 'x': array([0.11668]),
182
  'xaxis': 'x',
183
+ 'y': array(['LLaMA-2 Chat (7B)'], dtype=object),
184
  'yaxis': 'y'},
185
  {'alignmentgroup': 'True',
186
  'hovertemplate': 'Model=%{y}<br>Execution cost ($)=%{x}<extra></extra>',
187
+ 'legendgroup': 'Qwen 1.5 Chat (7B)',
188
  'marker': {'color': '#ab63fa', 'pattern': {'shape': ''}},
189
+ 'name': 'Qwen 1.5 Chat (7B)',
190
+ 'offsetgroup': 'Qwen 1.5 Chat (7B)',
191
  'orientation': 'h',
192
  'showlegend': True,
193
  'textposition': 'auto',
194
  'type': 'bar',
195
+ 'x': array([0.10312]),
196
  'xaxis': 'x',
197
+ 'y': array(['Qwen 1.5 Chat (7B)'], dtype=object),
198
  'yaxis': 'y'},
199
  {'alignmentgroup': 'True',
200
  'hovertemplate': 'Model=%{y}<br>Execution cost ($)=%{x}<extra></extra>',
201
+ 'legendgroup': 'OpenHermes-2.5-Mistral (7B)',
202
  'marker': {'color': '#FFA15A', 'pattern': {'shape': ''}},
203
+ 'name': 'OpenHermes-2.5-Mistral (7B)',
204
+ 'offsetgroup': 'OpenHermes-2.5-Mistral (7B)',
205
  'orientation': 'h',
206
  'showlegend': True,
207
  'textposition': 'auto',
208
  'type': 'bar',
209
+ 'x': array([0.099956]),
210
  'xaxis': 'x',
211
+ 'y': array(['OpenHermes-2.5-Mistral (7B)'], dtype=object),
212
  'yaxis': 'y'},
213
  {'alignmentgroup': 'True',
214
  'hovertemplate': 'Model=%{y}<br>Execution cost ($)=%{x}<extra></extra>',
215
+ 'legendgroup': 'Vicuna v1.5 (7B)',
216
  'marker': {'color': '#19d3f3', 'pattern': {'shape': ''}},
217
+ 'name': 'Vicuna v1.5 (7B)',
218
+ 'offsetgroup': 'Vicuna v1.5 (7B)',
219
  'orientation': 'h',
220
  'showlegend': True,
221
  'textposition': 'auto',
222
  'type': 'bar',
223
+ 'x': array([0.085688]),
224
  'xaxis': 'x',
225
+ 'y': array(['Vicuna v1.5 (7B)'], dtype=object),
226
  'yaxis': 'y'},
227
  {'alignmentgroup': 'True',
228
  'hovertemplate': 'Model=%{y}<br>Execution cost ($)=%{x}<extra></extra>',
229
+ 'legendgroup': 'Falcon Instruct (7B)',
230
  'marker': {'color': '#FF6692', 'pattern': {'shape': ''}},
231
+ 'name': 'Falcon Instruct (7B)',
232
+ 'offsetgroup': 'Falcon Instruct (7B)',
233
  'orientation': 'h',
234
  'showlegend': True,
235
  'textposition': 'auto',
236
  'type': 'bar',
237
+ 'x': array([0.08474]),
238
  'xaxis': 'x',
239
+ 'y': array(['Falcon Instruct (7B)'], dtype=object),
240
  'yaxis': 'y'},
241
  {'alignmentgroup': 'True',
242
  'hovertemplate': 'Model=%{y}<br>Execution cost ($)=%{x}<extra></extra>',
243
+ 'legendgroup': 'RedPajama-INCITE Chat (7B)',
244
  'marker': {'color': '#B6E880', 'pattern': {'shape': ''}},
245
+ 'name': 'RedPajama-INCITE Chat (7B)',
246
+ 'offsetgroup': 'RedPajama-INCITE Chat (7B)',
247
  'orientation': 'h',
248
  'showlegend': True,
249
  'textposition': 'auto',
250
  'type': 'bar',
251
+ 'x': array([0.082008]),
252
  'xaxis': 'x',
253
+ 'y': array(['RedPajama-INCITE Chat (7B)'], dtype=object),
254
  'yaxis': 'y'},
255
  {'alignmentgroup': 'True',
256
  'hovertemplate': 'Model=%{y}<br>Execution cost ($)=%{x}<extra></extra>',
257
+ 'legendgroup': 'chat-bison (PaLM 2)',
258
  'marker': {'color': '#FF97FF', 'pattern': {'shape': ''}},
259
+ 'name': 'chat-bison (PaLM 2)',
260
+ 'offsetgroup': 'chat-bison (PaLM 2)',
261
  'orientation': 'h',
262
  'showlegend': True,
263
  'textposition': 'auto',
264
  'type': 'bar',
265
+ 'x': array([0.0787475]),
266
  'xaxis': 'x',
267
+ 'y': array(['chat-bison (PaLM 2)'], dtype=object),
268
  'yaxis': 'y'},
269
  {'alignmentgroup': 'True',
270
  'hovertemplate': 'Model=%{y}<br>Execution cost ($)=%{x}<extra></extra>',
271
+ 'legendgroup': 'chat-bison-32k (PaLM 2 32K)',
272
  'marker': {'color': '#FECB52', 'pattern': {'shape': ''}},
273
+ 'name': 'chat-bison-32k (PaLM 2 32K)',
274
+ 'offsetgroup': 'chat-bison-32k (PaLM 2 32K)',
275
  'orientation': 'h',
276
  'showlegend': True,
277
  'textposition': 'auto',
278
  'type': 'bar',
279
+ 'x': array([0.0786175]),
280
  'xaxis': 'x',
281
+ 'y': array(['chat-bison-32k (PaLM 2 32K)'], dtype=object),
282
+ 'yaxis': 'y'},
283
+ {'alignmentgroup': 'True',
284
+ 'hovertemplate': 'Model=%{y}<br>Execution cost ($)=%{x}<extra></extra>',
285
+ 'legendgroup': 'gemini-pro',
286
+ 'marker': {'color': '#636efa', 'pattern': {'shape': ''}},
287
+ 'name': 'gemini-pro',
288
+ 'offsetgroup': 'gemini-pro',
289
+ 'orientation': 'h',
290
+ 'showlegend': True,
291
+ 'textposition': 'auto',
292
+ 'type': 'bar',
293
+ 'x': array([0.0775075]),
294
+ 'xaxis': 'x',
295
+ 'y': array(['gemini-pro'], dtype=object),
296
+ 'yaxis': 'y'},
297
+ {'alignmentgroup': 'True',
298
+ 'hovertemplate': 'Model=%{y}<br>Execution cost ($)=%{x}<extra></extra>',
299
+ 'legendgroup': 'TinyLlama/TinyLlama-1.1B-Chat-v1.0',
300
+ 'marker': {'color': '#EF553B', 'pattern': {'shape': ''}},
301
+ 'name': 'TinyLlama/TinyLlama-1.1B-Chat-v1.0',
302
+ 'offsetgroup': 'TinyLlama/TinyLlama-1.1B-Chat-v1.0',
303
+ 'orientation': 'h',
304
+ 'showlegend': True,
305
+ 'textposition': 'auto',
306
+ 'type': 'bar',
307
+ 'x': array([0.0661168]),
308
+ 'xaxis': 'x',
309
+ 'y': array(['TinyLlama/TinyLlama-1.1B-Chat-v1.0'], dtype=object),
310
+ 'yaxis': 'y'},
311
+ {'alignmentgroup': 'True',
312
+ 'hovertemplate': 'Model=%{y}<br>Execution cost ($)=%{x}<extra></extra>',
313
+ 'legendgroup': 'Mistral (7B) Instruct v0.2 (Together AI)',
314
+ 'marker': {'color': '#00cc96', 'pattern': {'shape': ''}},
315
+ 'name': 'Mistral (7B) Instruct v0.2 (Together AI)',
316
+ 'offsetgroup': 'Mistral (7B) Instruct v0.2 (Together AI)',
317
+ 'orientation': 'h',
318
+ 'showlegend': True,
319
+ 'textposition': 'auto',
320
+ 'type': 'bar',
321
+ 'x': array([0.059762]),
322
+ 'xaxis': 'x',
323
+ 'y': array(['Mistral (7B) Instruct v0.2 (Together AI)'], dtype=object),
324
  'yaxis': 'y'}],
325
  'layout': {'barmode': 'relative',
326
  'legend': {'title': {'text': 'Model'}, 'tracegroupgap': 0},
 
328
  'title': {'text': 'Costs of execution of 6660 test queries per model'},
329
  'xaxis': {'anchor': 'y', 'domain': [0.0, 1.0], 'title': {'text': 'Execution cost ($)'}},
330
  'yaxis': {'anchor': 'x',
331
+ 'categoryarray': [Mistral (7B) Instruct v0.2 (Together
332
+ AI), TinyLlama/TinyLlama-1.1B-Chat-v1.0,
333
+ gemini-pro, chat-bison-32k (PaLM 2 32K),
334
+ chat-bison (PaLM 2), RedPajama-INCITE
335
+ Chat (7B), Falcon Instruct (7B), Vicuna
336
+ v1.5 (7B), OpenHermes-2.5-Mistral (7B),
337
+ Qwen 1.5 Chat (7B), LLaMA-2 Chat (7B),
338
+ Upstage SOLAR Instruct v1 (11B),
339
+ WizardLM v1.2 (13B), Chronos Hermes
340
+ (13B), Snorkel Mistral PairRM DPO (7B),
341
+ Mistral-7B-Instruct-v0.2, gpt-3.5-turbo,
342
+ llama-2-70b-chat, 01-ai Yi Chat (34B),
343
+ zephyr-7b-beta,
344
+ Mixtral-8x7B-Instruct-v0.1, gpt-4-turbo,
345
+ gpt-4],
346
  'categoryorder': 'array',
347
  'domain': [0.0, 1.0],
348
  'title': {'text': 'Model'}}}
349
+ })",Costs of execution of 6660 test queries per model,,"{""data"":[{""alignmentgroup"":""True"",""hovertemplate"":""Model=%{y}\u003cbr\u003eExecution cost ($)=%{x}\u003cextra\u003e\u003c\u002fextra\u003e"",""legendgroup"":""gpt-4"",""marker"":{""color"":""#636efa"",""pattern"":{""shape"":""""}},""name"":""gpt-4"",""offsetgroup"":""gpt-4"",""orientation"":""h"",""showlegend"":true,""textposition"":""auto"",""x"":[9.1329],""xaxis"":""x"",""y"":[""gpt-4""],""yaxis"":""y"",""type"":""bar""},{""alignmentgroup"":""True"",""hovertemplate"":""Model=%{y}\u003cbr\u003eExecution cost ($)=%{x}\u003cextra\u003e\u003c\u002fextra\u003e"",""legendgroup"":""gpt-4-turbo"",""marker"":{""color"":""#EF553B"",""pattern"":{""shape"":""""}},""name"":""gpt-4-turbo"",""offsetgroup"":""gpt-4-turbo"",""orientation"":""h"",""showlegend"":true,""textposition"":""auto"",""x"":[6.7599],""xaxis"":""x"",""y"":[""gpt-4-turbo""],""yaxis"":""y"",""type"":""bar""},{""alignmentgroup"":""True"",""hovertemplate"":""Model=%{y}\u003cbr\u003eExecution cost ($)=%{x}\u003cextra\u003e\u003c\u002fextra\u003e"",""legendgroup"":""Mixtral-8x7B-Instruct-v0.1"",""marker"":{""color"":""#00cc96"",""pattern"":{""shape"":""""}},""name"":""Mixtral-8x7B-Instruct-v0.1"",""offsetgroup"":""Mixtral-8x7B-Instruct-v0.1"",""orientation"":""h"",""showlegend"":true,""textposition"":""auto"",""x"":[0.539613],""xaxis"":""x"",""y"":[""Mixtral-8x7B-Instruct-v0.1""],""yaxis"":""y"",""type"":""bar""},{""alignmentgroup"":""True"",""hovertemplate"":""Model=%{y}\u003cbr\u003eExecution cost ($)=%{x}\u003cextra\u003e\u003c\u002fextra\u003e"",""legendgroup"":""zephyr-7b-beta"",""marker"":{""color"":""#ab63fa"",""pattern"":{""shape"":""""}},""name"":""zephyr-7b-beta"",""offsetgroup"":""zephyr-7b-beta"",""orientation"":""h"",""showlegend"":true,""textposition"":""auto"",""x"":[0.49900072815683155],""xaxis"":""x"",""y"":[""zephyr-7b-beta""],""yaxis"":""y"",""type"":""bar""},{""alignmentgroup"":""True"",""hovertemplate"":""Model=%{y}\u003cbr\u003eExecution cost ($)=%{x}\u003cextra\u003e\u003c\u002fextra\u003e"",""legendgroup"":""01-ai Yi Chat (34B)"",""marker"":{""color"":""#FFA15A"",""pattern"":{""shape"":""""}},""name"":""01-ai Yi Chat (34B)"",""offsetgroup"":""01-ai Yi Chat (34B)"",""orientation"":""h"",""showlegend"":true,""textposition"":""auto"",""x"":[0.45192],""xaxis"":""x"",""y"":[""01-ai Yi Chat (34B)""],""yaxis"":""y"",""type"":""bar""},{""alignmentgroup"":""True"",""hovertemplate"":""Model=%{y}\u003cbr\u003eExecution cost ($)=%{x}\u003cextra\u003e\u003c\u002fextra\u003e"",""legendgroup"":""llama-2-70b-chat"",""marker"":{""color"":""#19d3f3"",""pattern"":{""shape"":""""}},""name"":""llama-2-70b-chat"",""offsetgroup"":""llama-2-70b-chat"",""orientation"":""h"",""showlegend"":true,""textposition"":""auto"",""x"":[0.355275],""xaxis"":""x"",""y"":[""llama-2-70b-chat""],""yaxis"":""y"",""type"":""bar""},{""alignmentgroup"":""True"",""hovertemplate"":""Model=%{y}\u003cbr\u003eExecution cost ($)=%{x}\u003cextra\u003e\u003c\u002fextra\u003e"",""legendgroup"":""gpt-3.5-turbo"",""marker"":{""color"":""#FF6692"",""pattern"":{""shape"":""""}},""name"":""gpt-3.5-turbo"",""offsetgroup"":""gpt-3.5-turbo"",""orientation"":""h"",""showlegend"":true,""textposition"":""auto"",""x"":[0.33931],""xaxis"":""x"",""y"":[""gpt-3.5-turbo""],""yaxis"":""y"",""type"":""bar""},{""alignmentgroup"":""True"",""hovertemplate"":""Model=%{y}\u003cbr\u003eExecution cost ($)=%{x}\u003cextra\u003e\u003c\u002fextra\u003e"",""legendgroup"":""Mistral-7B-Instruct-v0.2"",""marker"":{""color"":""#B6E880"",""pattern"":{""shape"":""""}},""name"":""Mistral-7B-Instruct-v0.2"",""offsetgroup"":""Mistral-7B-Instruct-v0.2"",""orientation"":""h"",""showlegend"":true,""textposition"":""auto"",""x"":[0.29065088506539666],""xaxis"":""x"",""y"":[""Mistral-7B-Instruct-v0.2""],""yaxis"":""y"",""type"":""bar""},{""alignmentgroup"":""True"",""hovertemplate"":""Model=%{y}\u003cbr\u003eExecution cost ($)=%{x}\u003cextra\u003e\u003c\u002fextra\u003e"",""legendgroup"":""Snorkel Mistral PairRM DPO (7B)"",""marker"":{""color"":""#FF97FF"",""pattern"":{""shape"":""""}},""name"":""Snorkel Mistral PairRM DPO (7B)"",""offsetgroup"":""Snorkel Mistral PairRM DPO (7B)"",""orientation"":""h"",""showlegend"":true,""textposition"":""auto"",""x"":[0.176236],""xaxis"":""x"",""y"":[""Snorkel Mistral PairRM DPO (7B)""],""yaxis"":""y"",""type"":""bar""},{""alignmentgroup"":""True"",""hovertemplate"":""Model=%{y}\u003cbr\u003eExecution cost ($)=%{x}\u003cextra\u003e\u003c\u002fextra\u003e"",""legendgroup"":""Chronos Hermes (13B)"",""marker"":{""color"":""#FECB52"",""pattern"":{""shape"":""""}},""name"":""Chronos Hermes (13B)"",""offsetgroup"":""Chronos Hermes (13B)"",""orientation"":""h"",""showlegend"":true,""textposition"":""auto"",""x"":[0.158268],""xaxis"":""x"",""y"":[""Chronos Hermes (13B)""],""yaxis"":""y"",""type"":""bar""},{""alignmentgroup"":""True"",""hovertemplate"":""Model=%{y}\u003cbr\u003eExecution cost ($)=%{x}\u003cextra\u003e\u003c\u002fextra\u003e"",""legendgroup"":""WizardLM v1.2 (13B)"",""marker"":{""color"":""#636efa"",""pattern"":{""shape"":""""}},""name"":""WizardLM v1.2 (13B)"",""offsetgroup"":""WizardLM v1.2 (13B)"",""orientation"":""h"",""showlegend"":true,""textposition"":""auto"",""x"":[0.147276],""xaxis"":""x"",""y"":[""WizardLM v1.2 (13B)""],""yaxis"":""y"",""type"":""bar""},{""alignmentgroup"":""True"",""hovertemplate"":""Model=%{y}\u003cbr\u003eExecution cost ($)=%{x}\u003cextra\u003e\u003c\u002fextra\u003e"",""legendgroup"":""Upstage SOLAR Instruct v1 (11B)"",""marker"":{""color"":""#EF553B"",""pattern"":{""shape"":""""}},""name"":""Upstage SOLAR Instruct v1 (11B)"",""offsetgroup"":""Upstage SOLAR Instruct v1 (11B)"",""orientation"":""h"",""showlegend"":true,""textposition"":""auto"",""x"":[0.117306],""xaxis"":""x"",""y"":[""Upstage SOLAR Instruct v1 (11B)""],""yaxis"":""y"",""type"":""bar""},{""alignmentgroup"":""True"",""hovertemplate"":""Model=%{y}\u003cbr\u003eExecution cost ($)=%{x}\u003cextra\u003e\u003c\u002fextra\u003e"",""legendgroup"":""LLaMA-2 Chat (7B)"",""marker"":{""color"":""#00cc96"",""pattern"":{""shape"":""""}},""name"":""LLaMA-2 Chat (7B)"",""offsetgroup"":""LLaMA-2 Chat (7B)"",""orientation"":""h"",""showlegend"":true,""textposition"":""auto"",""x"":[0.11668],""xaxis"":""x"",""y"":[""LLaMA-2 Chat (7B)""],""yaxis"":""y"",""type"":""bar""},{""alignmentgroup"":""True"",""hovertemplate"":""Model=%{y}\u003cbr\u003eExecution cost ($)=%{x}\u003cextra\u003e\u003c\u002fextra\u003e"",""legendgroup"":""Qwen 1.5 Chat (7B)"",""marker"":{""color"":""#ab63fa"",""pattern"":{""shape"":""""}},""name"":""Qwen 1.5 Chat (7B)"",""offsetgroup"":""Qwen 1.5 Chat (7B)"",""orientation"":""h"",""showlegend"":true,""textposition"":""auto"",""x"":[0.10311999999999999],""xaxis"":""x"",""y"":[""Qwen 1.5 Chat (7B)""],""yaxis"":""y"",""type"":""bar""},{""alignmentgroup"":""True"",""hovertemplate"":""Model=%{y}\u003cbr\u003eExecution cost ($)=%{x}\u003cextra\u003e\u003c\u002fextra\u003e"",""legendgroup"":""OpenHermes-2.5-Mistral (7B)"",""marker"":{""color"":""#FFA15A"",""pattern"":{""shape"":""""}},""name"":""OpenHermes-2.5-Mistral (7B)"",""offsetgroup"":""OpenHermes-2.5-Mistral (7B)"",""orientation"":""h"",""showlegend"":true,""textposition"":""auto"",""x"":[0.09995599999999999],""xaxis"":""x"",""y"":[""OpenHermes-2.5-Mistral (7B)""],""yaxis"":""y"",""type"":""bar""},{""alignmentgroup"":""True"",""hovertemplate"":""Model=%{y}\u003cbr\u003eExecution cost ($)=%{x}\u003cextra\u003e\u003c\u002fextra\u003e"",""legendgroup"":""Vicuna v1.5 (7B)"",""marker"":{""color"":""#19d3f3"",""pattern"":{""shape"":""""}},""name"":""Vicuna v1.5 (7B)"",""offsetgroup"":""Vicuna v1.5 (7B)"",""orientation"":""h"",""showlegend"":true,""textposition"":""auto"",""x"":[0.085688],""xaxis"":""x"",""y"":[""Vicuna v1.5 (7B)""],""yaxis"":""y"",""type"":""bar""},{""alignmentgroup"":""True"",""hovertemplate"":""Model=%{y}\u003cbr\u003eExecution cost ($)=%{x}\u003cextra\u003e\u003c\u002fextra\u003e"",""legendgroup"":""Falcon Instruct (7B)"",""marker"":{""color"":""#FF6692"",""pattern"":{""shape"":""""}},""name"":""Falcon Instruct (7B)"",""offsetgroup"":""Falcon Instruct (7B)"",""orientation"":""h"",""showlegend"":true,""textposition"":""auto"",""x"":[0.08474],""xaxis"":""x"",""y"":[""Falcon Instruct (7B)""],""yaxis"":""y"",""type"":""bar""},{""alignmentgroup"":""True"",""hovertemplate"":""Model=%{y}\u003cbr\u003eExecution cost ($)=%{x}\u003cextra\u003e\u003c\u002fextra\u003e"",""legendgroup"":""RedPajama-INCITE Chat (7B)"",""marker"":{""color"":""#B6E880"",""pattern"":{""shape"":""""}},""name"":""RedPajama-INCITE Chat (7B)"",""offsetgroup"":""RedPajama-INCITE Chat (7B)"",""orientation"":""h"",""showlegend"":true,""textposition"":""auto"",""x"":[0.082008],""xaxis"":""x"",""y"":[""RedPajama-INCITE Chat (7B)""],""yaxis"":""y"",""type"":""bar""},{""alignmentgroup"":""True"",""hovertemplate"":""Model=%{y}\u003cbr\u003eExecution cost ($)=%{x}\u003cextra\u003e\u003c\u002fextra\u003e"",""legendgroup"":""chat-bison (PaLM 2)"",""marker"":{""color"":""#FF97FF"",""pattern"":{""shape"":""""}},""name"":""chat-bison (PaLM 2)"",""offsetgroup"":""chat-bison (PaLM 2)"",""orientation"":""h"",""showlegend"":true,""textposition"":""auto"",""x"":[0.0787475],""xaxis"":""x"",""y"":[""chat-bison (PaLM 2)""],""yaxis"":""y"",""type"":""bar""},{""alignmentgroup"":""True"",""hovertemplate"":""Model=%{y}\u003cbr\u003eExecution cost ($)=%{x}\u003cextra\u003e\u003c\u002fextra\u003e"",""legendgroup"":""chat-bison-32k (PaLM 2 32K)"",""marker"":{""color"":""#FECB52"",""pattern"":{""shape"":""""}},""name"":""chat-bison-32k (PaLM 2 32K)"",""offsetgroup"":""chat-bison-32k (PaLM 2 32K)"",""orientation"":""h"",""showlegend"":true,""textposition"":""auto"",""x"":[0.07861749999999999],""xaxis"":""x"",""y"":[""chat-bison-32k (PaLM 2 32K)""],""yaxis"":""y"",""type"":""bar""},{""alignmentgroup"":""True"",""hovertemplate"":""Model=%{y}\u003cbr\u003eExecution cost ($)=%{x}\u003cextra\u003e\u003c\u002fextra\u003e"",""legendgroup"":""gemini-pro"",""marker"":{""color"":""#636efa"",""pattern"":{""shape"":""""}},""name"":""gemini-pro"",""offsetgroup"":""gemini-pro"",""orientation"":""h"",""showlegend"":true,""textposition"":""auto"",""x"":[0.0775075],""xaxis"":""x"",""y"":[""gemini-pro""],""yaxis"":""y"",""type"":""bar""},{""alignmentgroup"":""True"",""hovertemplate"":""Model=%{y}\u003cbr\u003eExecution cost ($)=%{x}\u003cextra\u003e\u003c\u002fextra\u003e"",""legendgroup"":""TinyLlama\u002fTinyLlama-1.1B-Chat-v1.0"",""marker"":{""color"":""#EF553B"",""pattern"":{""shape"":""""}},""name"":""TinyLlama\u002fTinyLlama-1.1B-Chat-v1.0"",""offsetgroup"":""TinyLlama\u002fTinyLlama-1.1B-Chat-v1.0"",""orientation"":""h"",""showlegend"":true,""textposition"":""auto"",""x"":[0.06611679673194885],""xaxis"":""x"",""y"":[""TinyLlama\u002fTinyLlama-1.1B-Chat-v1.0""],""yaxis"":""y"",""type"":""bar""},{""alignmentgroup"":""True"",""hovertemplate"":""Model=%{y}\u003cbr\u003eExecution cost ($)=%{x}\u003cextra\u003e\u003c\u002fextra\u003e"",""legendgroup"":""Mistral (7B) Instruct v0.2 (Together AI)"",""marker"":{""color"":""#00cc96"",""pattern"":{""shape"":""""}},""name"":""Mistral (7B) Instruct v0.2 (Together AI)"",""offsetgroup"":""Mistral (7B) Instruct v0.2 (Together AI)"",""orientation"":""h"",""showlegend"":true,""textposition"":""auto"",""x"":[0.059761999999999996],""xaxis"":""x"",""y"":[""Mistral (7B) Instruct v0.2 (Together AI)""],""yaxis"":""y"",""type"":""bar""}],""layout"":{""template"":{""data"":{""histogram2dcontour"":[{""type"":""histogram2dcontour"",""colorbar"":{""outlinewidth"":0,""ticks"":""""},""colorscale"":[[0.0,""#0d0887""],[0.1111111111111111,""#46039f""],[0.2222222222222222,""#7201a8""],[0.3333333333333333,""#9c179e""],[0.4444444444444444,""#bd3786""],[0.5555555555555556,""#d8576b""],[0.6666666666666666,""#ed7953""],[0.7777777777777778,""#fb9f3a""],[0.8888888888888888,""#fdca26""],[1.0,""#f0f921""]]}],""choropleth"":[{""type"":""choropleth"",""colorbar"":{""outlinewidth"":0,""ticks"":""""}}],""histogram2d"":[{""type"":""histogram2d"",""colorbar"":{""outlinewidth"":0,""ticks"":""""},""colorscale"":[[0.0,""#0d0887""],[0.1111111111111111,""#46039f""],[0.2222222222222222,""#7201a8""],[0.3333333333333333,""#9c179e""],[0.4444444444444444,""#bd3786""],[0.5555555555555556,""#d8576b""],[0.6666666666666666,""#ed7953""],[0.7777777777777778,""#fb9f3a""],[0.8888888888888888,""#fdca26""],[1.0,""#f0f921""]]}],""heatmap"":[{""type"":""heatmap"",""colorbar"":{""outlinewidth"":0,""ticks"":""""},""colorscale"":[[0.0,""#0d0887""],[0.1111111111111111,""#46039f""],[0.2222222222222222,""#7201a8""],[0.3333333333333333,""#9c179e""],[0.4444444444444444,""#bd3786""],[0.5555555555555556,""#d8576b""],[0.6666666666666666,""#ed7953""],[0.7777777777777778,""#fb9f3a""],[0.8888888888888888,""#fdca26""],[1.0,""#f0f921""]]}],""heatmapgl"":[{""type"":""heatmapgl"",""colorbar"":{""outlinewidth"":0,""ticks"":""""},""colorscale"":[[0.0,""#0d0887""],[0.1111111111111111,""#46039f""],[0.2222222222222222,""#7201a8""],[0.3333333333333333,""#9c179e""],[0.4444444444444444,""#bd3786""],[0.5555555555555556,""#d8576b""],[0.6666666666666666,""#ed7953""],[0.7777777777777778,""#fb9f3a""],[0.8888888888888888,""#fdca26""],[1.0,""#f0f921""]]}],""contourcarpet"":[{""type"":""contourcarpet"",""colorbar"":{""outlinewidth"":0,""ticks"":""""}}],""contour"":[{""type"":""contour"",""colorbar"":{""outlinewidth"":0,""ticks"":""""},""colorscale"":[[0.0,""#0d0887""],[0.1111111111111111,""#46039f""],[0.2222222222222222,""#7201a8""],[0.3333333333333333,""#9c179e""],[0.4444444444444444,""#bd3786""],[0.5555555555555556,""#d8576b""],[0.6666666666666666,""#ed7953""],[0.7777777777777778,""#fb9f3a""],[0.8888888888888888,""#fdca26""],[1.0,""#f0f921""]]}],""surface"":[{""type"":""surface"",""colorbar"":{""outlinewidth"":0,""ticks"":""""},""colorscale"":[[0.0,""#0d0887""],[0.1111111111111111,""#46039f""],[0.2222222222222222,""#7201a8""],[0.3333333333333333,""#9c179e""],[0.4444444444444444,""#bd3786""],[0.5555555555555556,""#d8576b""],[0.6666666666666666,""#ed7953""],[0.7777777777777778,""#fb9f3a""],[0.8888888888888888,""#fdca26""],[1.0,""#f0f921""]]}],""mesh3d"":[{""type"":""mesh3d"",""colorbar"":{""outlinewidth"":0,""ticks"":""""}}],""scatter"":[{""fillpattern"":{""fillmode"":""overlay"",""size"":10,""solidity"":0.2},""type"":""scatter""}],""parcoords"":[{""type"":""parcoords"",""line"":{""colorbar"":{""outlinewidth"":0,""ticks"":""""}}}],""scatterpolargl"":[{""type"":""scatterpolargl"",""marker"":{""colorbar"":{""outlinewidth"":0,""ticks"":""""}}}],""bar"":[{""error_x"":{""color"":""#2a3f5f""},""error_y"":{""color"":""#2a3f5f""},""marker"":{""line"":{""color"":""#E5ECF6"",""width"":0.5},""pattern"":{""fillmode"":""overlay"",""size"":10,""solidity"":0.2}},""type"":""bar""}],""scattergeo"":[{""type"":""scattergeo"",""marker"":{""colorbar"":{""outlinewidth"":0,""ticks"":""""}}}],""scatterpolar"":[{""type"":""scatterpolar"",""marker"":{""colorbar"":{""outlinewidth"":0,""ticks"":""""}}}],""histogram"":[{""marker"":{""pattern"":{""fillmode"":""overlay"",""size"":10,""solidity"":0.2}},""type"":""histogram""}],""scattergl"":[{""type"":""scattergl"",""marker"":{""colorbar"":{""outlinewidth"":0,""ticks"":""""}}}],""scatter3d"":[{""type"":""scatter3d"",""line"":{""colorbar"":{""outlinewidth"":0,""ticks"":""""}},""marker"":{""colorbar"":{""outlinewidth"":0,""ticks"":""""}}}],""scattermapbox"":[{""type"":""scattermapbox"",""marker"":{""colorbar"":{""outlinewidth"":0,""ticks"":""""}}}],""scatterternary"":[{""type"":""scatterternary"",""marker"":{""colorbar"":{""outlinewidth"":0,""ticks"":""""}}}],""scattercarpet"":[{""type"":""scattercarpet"",""marker"":{""colorbar"":{""outlinewidth"":0,""ticks"":""""}}}],""carpet"":[{""aaxis"":{""endlinecolor"":""#2a3f5f"",""gridcolor"":""white"",""linecolor"":""white"",""minorgridcolor"":""white"",""startlinecolor"":""#2a3f5f""},""baxis"":{""endlinecolor"":""#2a3f5f"",""gridcolor"":""white"",""linecolor"":""white"",""minorgridcolor"":""white"",""startlinecolor"":""#2a3f5f""},""type"":""carpet""}],""table"":[{""cells"":{""fill"":{""color"":""#EBF0F8""},""line"":{""color"":""white""}},""header"":{""fill"":{""color"":""#C8D4E3""},""line"":{""color"":""white""}},""type"":""table""}],""barpolar"":[{""marker"":{""line"":{""color"":""#E5ECF6"",""width"":0.5},""pattern"":{""fillmode"":""overlay"",""size"":10,""solidity"":0.2}},""type"":""barpolar""}],""pie"":[{""automargin"":true,""type"":""pie""}]},""layout"":{""autotypenumbers"":""strict"",""colorway"":[""#636efa"",""#EF553B"",""#00cc96"",""#ab63fa"",""#FFA15A"",""#19d3f3"",""#FF6692"",""#B6E880"",""#FF97FF"",""#FECB52""],""font"":{""color"":""#2a3f5f""},""hovermode"":""closest"",""hoverlabel"":{""align"":""left""},""paper_bgcolor"":""white"",""plot_bgcolor"":""#E5ECF6"",""polar"":{""bgcolor"":""#E5ECF6"",""angularaxis"":{""gridcolor"":""white"",""linecolor"":""white"",""ticks"":""""},""radialaxis"":{""gridcolor"":""white"",""linecolor"":""white"",""ticks"":""""}},""ternary"":{""bgcolor"":""#E5ECF6"",""aaxis"":{""gridcolor"":""white"",""linecolor"":""white"",""ticks"":""""},""baxis"":{""gridcolor"":""white"",""linecolor"":""white"",""ticks"":""""},""caxis"":{""gridcolor"":""white"",""linecolor"":""white"",""ticks"":""""}},""coloraxis"":{""colorbar"":{""outlinewidth"":0,""ticks"":""""}},""colorscale"":{""sequential"":[[0.0,""#0d0887""],[0.1111111111111111,""#46039f""],[0.2222222222222222,""#7201a8""],[0.3333333333333333,""#9c179e""],[0.4444444444444444,""#bd3786""],[0.5555555555555556,""#d8576b""],[0.6666666666666666,""#ed7953""],[0.7777777777777778,""#fb9f3a""],[0.8888888888888888,""#fdca26""],[1.0,""#f0f921""]],""sequentialminus"":[[0.0,""#0d0887""],[0.1111111111111111,""#46039f""],[0.2222222222222222,""#7201a8""],[0.3333333333333333,""#9c179e""],[0.4444444444444444,""#bd3786""],[0.5555555555555556,""#d8576b""],[0.6666666666666666,""#ed7953""],[0.7777777777777778,""#fb9f3a""],[0.8888888888888888,""#fdca26""],[1.0,""#f0f921""]],""diverging"":[[0,""#8e0152""],[0.1,""#c51b7d""],[0.2,""#de77ae""],[0.3,""#f1b6da""],[0.4,""#fde0ef""],[0.5,""#f7f7f7""],[0.6,""#e6f5d0""],[0.7,""#b8e186""],[0.8,""#7fbc41""],[0.9,""#4d9221""],[1,""#276419""]]},""xaxis"":{""gridcolor"":""white"",""linecolor"":""white"",""ticks"":"""",""title"":{""standoff"":15},""zerolinecolor"":""white"",""automargin"":true,""zerolinewidth"":2},""yaxis"":{""gridcolor"":""white"",""linecolor"":""white"",""ticks"":"""",""title"":{""standoff"":15},""zerolinecolor"":""white"",""automargin"":true,""zerolinewidth"":2},""scene"":{""xaxis"":{""backgroundcolor"":""#E5ECF6"",""gridcolor"":""white"",""linecolor"":""white"",""showbackground"":true,""ticks"":"""",""zerolinecolor"":""white"",""gridwidth"":2},""yaxis"":{""backgroundcolor"":""#E5ECF6"",""gridcolor"":""white"",""linecolor"":""white"",""showbackground"":true,""ticks"":"""",""zerolinecolor"":""white"",""gridwidth"":2},""zaxis"":{""backgroundcolor"":""#E5ECF6"",""gridcolor"":""white"",""linecolor"":""white"",""showbackground"":true,""ticks"":"""",""zerolinecolor"":""white"",""gridwidth"":2}},""shapedefaults"":{""line"":{""color"":""#2a3f5f""}},""annotationdefaults"":{""arrowcolor"":""#2a3f5f"",""arrowhead"":0,""arrowwidth"":1},""geo"":{""bgcolor"":""white"",""landcolor"":""#E5ECF6"",""subunitcolor"":""white"",""showland"":true,""showlakes"":true,""lakecolor"":""white""},""title"":{""x"":0.05},""mapbox"":{""style"":""light""}}},""xaxis"":{""anchor"":""y"",""domain"":[0.0,1.0],""title"":{""text"":""Execution cost ($)""}},""yaxis"":{""anchor"":""x"",""domain"":[0.0,1.0],""title"":{""text"":""Model""},""categoryorder"":""array"",""categoryarray"":[""Mistral (7B) Instruct v0.2 (Together AI)"",""TinyLlama\u002fTinyLlama-1.1B-Chat-v1.0"",""gemini-pro"",""chat-bison-32k (PaLM 2 32K)"",""chat-bison (PaLM 2)"",""RedPajama-INCITE Chat (7B)"",""Falcon Instruct (7B)"",""Vicuna v1.5 (7B)"",""OpenHermes-2.5-Mistral (7B)"",""Qwen 1.5 Chat (7B)"",""LLaMA-2 Chat (7B)"",""Upstage SOLAR Instruct v1 (11B)"",""WizardLM v1.2 (13B)"",""Chronos Hermes (13B)"",""Snorkel Mistral PairRM DPO (7B)"",""Mistral-7B-Instruct-v0.2"",""gpt-3.5-turbo"",""llama-2-70b-chat"",""01-ai Yi Chat (34B)"",""zephyr-7b-beta"",""Mixtral-8x7B-Instruct-v0.1"",""gpt-4-turbo"",""gpt-4""]},""legend"":{""title"":{""text"":""Model""},""tracegroupgap"":0},""title"":{""text"":""Costs of execution of 6660 test queries per model""},""barmode"":""relative""}}","{""y"": ""model"", ""x"": ""model_query_costs"", ""color"": ""model"", ""orientation"": ""h"", ""title"": ""Costs of execution of 6660 test queries per model"", ""labels"": {""model"": ""Model"", ""model_query_costs"": ""Execution cost ($)""}}",",model_query_costs,model
350
+ 2,9.1329,gpt-4
351
+ 1,6.7599,gpt-4-turbo
352
+ 4,0.539613,Mixtral-8x7B-Instruct-v0.1
353
+ 5,0.49900072815683155,zephyr-7b-beta
354
+ 11,0.45192,01-ai Yi Chat (34B)
355
+ 3,0.355275,llama-2-70b-chat
356
+ 0,0.33931,gpt-3.5-turbo
357
+ 6,0.29065088506539666,Mistral-7B-Instruct-v0.2
358
+ 43,0.176236,Snorkel Mistral PairRM DPO (7B)
359
+ 12,0.158268,Chronos Hermes (13B)
360
+ 55,0.147276,WizardLM v1.2 (13B)
361
+ 56,0.117306,Upstage SOLAR Instruct v1 (11B)
362
+ 24,0.11668,LLaMA-2 Chat (7B)
363
+ 40,0.10311999999999999,Qwen 1.5 Chat (7B)
364
+ 46,0.09995599999999999,OpenHermes-2.5-Mistral (7B)
365
+ 17,0.085688,Vicuna v1.5 (7B)
366
+ 48,0.08474,Falcon Instruct (7B)
367
+ 51,0.082008,RedPajama-INCITE Chat (7B)
368
+ 9,0.0787475,chat-bison (PaLM 2)
369
+ 10,0.07861749999999999,chat-bison-32k (PaLM 2 32K)
370
+ 8,0.0775075,gemini-pro
371
+ 7,0.06611679673194885,TinyLlama/TinyLlama-1.1B-Chat-v1.0
372
+ 26,0.059761999999999996,Mistral (7B) Instruct v0.2 (Together AI)
373
  "
374
  model_sizes,./html/plots/model_sizes.html,"Figure({
375
  'data': [{'alignmentgroup': 'True',
data/summary_metrics_plots.csv ADDED
The diff for this file is too large to render. See raw diff
 
pipeline/config.py CHANGED
@@ -64,3 +64,12 @@ class GeneralPlotConfig(Config):
64
  seconds_per_token: float = 184 / 6
65
  input_size: int = 100
66
  expected_output_size: int = 50
 
 
 
 
 
 
 
 
 
 
64
  seconds_per_token: float = 184 / 6
65
  input_size: int = 100
66
  expected_output_size: int = 50
67
+
68
+
69
+ class CombinedPlotsConfig(Config):
70
+ plots_dir: str = "./html/plots/"
71
+ saving_path: str = "data/"
72
+ scatter_plots: bool = False
73
+
74
+ class SummaryMetricsConfig(Config):
75
+ combined_score: bool = False