Spaces:

CoreyMorris
/

MMLU-by-task-Leaderboard

Running

App Files Files Community

Corey Morris commited on Jul 24, 2023

Commit

843a5ef

•

1 Parent(s): 03ade34

Refactoring. Moved ResultDataProcessor class to a separate file to make it easier to use with experimentation in a jupyter notebook

Browse files

Files changed (2) hide show

app.py +4 -72
result_data_processor.py +68 -0

app.py CHANGED Viewed

@@ -1,73 +1,7 @@
 import streamlit as st
 import pandas as pd
-import os
-import fnmatch
-import json
 import plotly.express as px
-class ResultDataProcessor:
-    def __init__(self):
-        self.data = self.process_data()
-    def process_data(self):
-        dataframes = []
-        def find_files(directory, pattern):
-            for root, dirs, files in os.walk(directory):
-                for basename in files:
-                    if fnmatch.fnmatch(basename, pattern):
-                        filename = os.path.join(root, basename)
-                        yield filename
-        for filename in find_files('results', 'results*.json'):
-            model_name = filename.split('/')[2]
-            with open(filename) as f:
-                data = json.load(f)
-                df = pd.DataFrame(data['results']).T
-                # data cleanup
-                df = df.rename(columns={'acc': model_name})
-                # Replace 'hendrycksTest-' with a more descriptive column name
-                df.index = df.index.str.replace('hendrycksTest-', 'MMLU_', regex=True)
-                df.index = df.index.str.replace('harness\|', '', regex=True)
-                # remove |5 from the index
-                df.index = df.index.str.replace('\|5', '', regex=True)
-                dataframes.append(df[[model_name]])
-        data = pd.concat(dataframes, axis=1)
-        data = data.transpose()
-        data['Model Name'] = data.index
-        cols = data.columns.tolist()
-        cols = cols[-1:] + cols[:-1]
-        data = data[cols]
-        # remove the Model Name column
-        data = data.drop(['Model Name'], axis=1)
-        # remove the all column
-        data = data.drop(['all'], axis=1)
-        # remove the truthfulqa:mc|0 column
-        data = data.drop(['truthfulqa:mc|0'], axis=1)
-        # create a new column that averages the results from each of the columns with a name that start with MMLU
-        data['MMLU_average'] = data.filter(regex='MMLU').mean(axis=1)
-        # move the MMLU_average column to the third column in the dataframe
-        cols = data.columns.tolist()
-        cols = cols[:2] + cols[-1:] + cols[2:-1]
-        data = data[cols]
-        return data
-    # filter data based on the index
-    def get_data(self, selected_models):
-        filtered_data = self.data[self.data.index.isin(selected_models)]
-        return filtered_data
 data_provider = ResultDataProcessor()
@@ -131,10 +65,6 @@ def create_plot(df, arc_column, moral_column, models=None):
     return fig
-st.header('Overall benchmark comparison')
 st.header('Custom scatter plots')
 selected_x_column = st.selectbox('Select x-axis', filtered_data.columns.tolist(), index=0)
 selected_y_column = st.selectbox('Select y-axis', filtered_data.columns.tolist(), index=1)
@@ -145,6 +75,8 @@ if selected_x_column != selected_y_column:    # Avoid creating a plot with the s
 else:
     st.write("Please select different columns for the x and y axes.")
 fig = create_plot(filtered_data, 'arc:challenge|25', 'hellaswag|10')
 st.plotly_chart(fig)
@@ -159,7 +91,7 @@ top_50 = filtered_data.nlargest(50, 'MMLU_average')
 fig = create_plot(top_50, 'arc:challenge|25', 'MMLU_average')
 st.plotly_chart(fig)
-st.header('Moral Scenarios')
 fig = create_plot(filtered_data, 'arc:challenge|25', 'MMLU_moral_scenarios')
 st.plotly_chart(fig)

 import streamlit as st
 import pandas as pd
 import plotly.express as px
+from result_data_processor import ResultDataProcessor
 data_provider = ResultDataProcessor()
     return fig
 st.header('Custom scatter plots')
 selected_x_column = st.selectbox('Select x-axis', filtered_data.columns.tolist(), index=0)
 selected_y_column = st.selectbox('Select y-axis', filtered_data.columns.tolist(), index=1)
 else:
     st.write("Please select different columns for the x and y axes.")
+st.header('Overall evaluation comparisons')
 fig = create_plot(filtered_data, 'arc:challenge|25', 'hellaswag|10')
 st.plotly_chart(fig)
 fig = create_plot(top_50, 'arc:challenge|25', 'MMLU_average')
 st.plotly_chart(fig)
+st.header('Moral Reasoning')
 fig = create_plot(filtered_data, 'arc:challenge|25', 'MMLU_moral_scenarios')
 st.plotly_chart(fig)

result_data_processor.py ADDED Viewed

	@@ -0,0 +1,68 @@

+import pandas as pd
+import os
+import fnmatch
+import json
+class ResultDataProcessor:
+    def __init__(self):
+        self.data = self.process_data()
+    def process_data(self):
+        dataframes = []
+        def find_files(directory, pattern):
+            for root, dirs, files in os.walk(directory):
+                for basename in files:
+                    if fnmatch.fnmatch(basename, pattern):
+                        filename = os.path.join(root, basename)
+                        yield filename
+        for filename in find_files('results', 'results*.json'):
+            model_name = filename.split('/')[2]
+            with open(filename) as f:
+                data = json.load(f)
+                df = pd.DataFrame(data['results']).T
+                # data cleanup
+                df = df.rename(columns={'acc': model_name})
+                # Replace 'hendrycksTest-' with a more descriptive column name
+                df.index = df.index.str.replace('hendrycksTest-', 'MMLU_', regex=True)
+                df.index = df.index.str.replace('harness\|', '', regex=True)
+                # remove |5 from the index
+                df.index = df.index.str.replace('\|5', '', regex=True)
+                dataframes.append(df[[model_name]])
+        data = pd.concat(dataframes, axis=1)
+        data = data.transpose()
+        data['Model Name'] = data.index
+        cols = data.columns.tolist()
+        cols = cols[-1:] + cols[:-1]
+        data = data[cols]
+        # remove the Model Name column
+        data = data.drop(['Model Name'], axis=1)
+        # remove the all column
+        data = data.drop(['all'], axis=1)
+        # remove the truthfulqa:mc|0 column
+        data = data.drop(['truthfulqa:mc|0'], axis=1)
+        # create a new column that averages the results from each of the columns with a name that start with MMLU
+        data['MMLU_average'] = data.filter(regex='MMLU').mean(axis=1)
+        # move the MMLU_average column to the third column in the dataframe
+        cols = data.columns.tolist()
+        cols = cols[:2] + cols[-1:] + cols[2:-1]
+        data = data[cols]
+        return data
+    # filter data based on the index
+    def get_data(self, selected_models):
+        filtered_data = self.data[self.data.index.isin(selected_models)]
+        return filtered_data