Spaces:

llm-council
/

emotional-intelligence-arena

Running

App Files Files Community

justinxzhao commited on Jun 12

Commit

a129336

•

1 Parent(s): ae3759c

Add analysis graphs, and add color coding to interpersonal conflicts data samples.

Browse files

Files changed (5) hide show

app.py +80 -22
img/council_normalized.png +0 -0
img/judge_agreement.sidewise_cohen_kappa.png +0 -0
img/llm_vs_llm_win_rates.png +0 -0
img/raw.png +0 -0

app.py CHANGED Viewed

@@ -51,14 +51,6 @@ def pil_to_base64(img):
     return img_str
-# Function to convert PIL image to base64
-def pil_svg_to_base64(img):
-    buffered = BytesIO()
-    img.save(buffered, format="SVG")
-    img_str = base64.b64encode(buffered.getvalue()).decode()
-    return img_str
 # Load your dataframes
 df_test_set = pd.read_json("data/test_set.jsonl", lines=True)
 df_responses = pd.read_json("data/responses.jsonl", lines=True)
@@ -82,7 +74,7 @@ model_options = df_responses["llm_responder"].unique().tolist()
 # Prepare the judge selector options
 judge_options = df_response_judging["llm_judge"].unique().tolist()
-st.set_page_config(page_title="Language Model Council", page_icon="🧊", layout="wide")
 # Create three columns
 col1, col2, col3 = st.columns(3)
@@ -142,7 +134,7 @@ st.markdown(center_css, unsafe_allow_html=True)
 # st.markdown(centered_image_html, unsafe_allow_html=True)
 # Title and subtitle.
-st.title("Language Model Council")
 st.markdown(
     "###### Benchmarking Foundation Models on Highly Subjective Tasks by Consensus :classical_building:"
 )
@@ -179,11 +171,19 @@ st.markdown(
 )
 # Create horizontal tabs
-tabs = st.tabs(["Leaderboard Results", "Data Samples", "About Us"])
 # Define content for each tab
 with tabs[0]:
-    st.dataframe(df_leaderboard)
 # HTML and CSS to create a text box with specified color
@@ -193,7 +193,7 @@ def colored_text_box(text, background_color, text_color="black"):
         background-color: {background_color};
         color: {text_color};
         padding: 10px;
-        border-radius: 5px;
         ">
         {text}
     </div>
@@ -263,15 +263,21 @@ with tabs[1]:
         # Display the detailed dilemma and additional information
         st.markdown(
             colored_text_box(
-                scenario_details["detailed_dilemma"], "#eeeeeeff", "black"
             ),
             unsafe_allow_html=True,
         )
         with st.expander("Additional Information"):
-            st.write(f"**LLM Author:** {scenario_details['llm_author']}")
-            st.write(f"**Problem:** {scenario_details['problem']}")
-            st.write(f"**Relationship:** {scenario_details['relationship']}")
-            st.write(f"**Scenario:** {scenario_details['scenario']}")
     st.divider()
@@ -296,7 +302,9 @@ with tabs[1]:
             # Display the response string
             st.markdown(
                 colored_text_box(
-                    response_details_fixed["response_string"], "#eeeeeeff", "black"
                 ),
                 unsafe_allow_html=True,
             )
@@ -324,7 +332,9 @@ with tabs[1]:
             # Display the response string
             st.markdown(
                 colored_text_box(
-                    response_details_dynamic["response_string"], "#eeeeeeff", "black"
                 ),
                 unsafe_allow_html=True,
             )
@@ -414,7 +424,7 @@ with tabs[1]:
                 st.markdown(
                     colored_text_box(
                         judging_details_left["judging_response_string"],
-                        "#eeeeeeff",
                         "black",
                     ),
                     unsafe_allow_html=True,
@@ -430,7 +440,7 @@ with tabs[1]:
                 st.markdown(
                     colored_text_box(
                         judging_details_right["judging_response_string"],
-                        "#eeeeeeff",
                         "black",
                     ),
                     unsafe_allow_html=True,
@@ -439,6 +449,54 @@ with tabs[1]:
                 st.write("No judging details found for the selected combination.")
 with tabs[2]:
     st.write(
         """
     Please reach out if you are interested in collaborating!

     return img_str
 # Load your dataframes
 df_test_set = pd.read_json("data/test_set.jsonl", lines=True)
 df_responses = pd.read_json("data/responses.jsonl", lines=True)
 # Prepare the judge selector options
 judge_options = df_response_judging["llm_judge"].unique().tolist()
+st.set_page_config(page_title="Language Model Council", page_icon="🏛️", layout="wide")
 # Create three columns
 col1, col2, col3 = st.columns(3)
 # st.markdown(centered_image_html, unsafe_allow_html=True)
 # Title and subtitle.
+st.title("🗳️ Language Model Council")
 st.markdown(
     "###### Benchmarking Foundation Models on Highly Subjective Tasks by Consensus :classical_building:"
 )
 )
 # Create horizontal tabs
+tabs = st.tabs(
+    [
+        "Leaderboard Results",
+        "Interpersonal Conflicts",
+        "Analysis",
+        "About Us",
+    ]
+)
 # Define content for each tab
 with tabs[0]:
+    _, mid_column, _ = st.columns([0.2, 0.6, 0.2])
+    mid_column.dataframe(df_leaderboard)
 # HTML and CSS to create a text box with specified color
         background-color: {background_color};
         color: {text_color};
         padding: 10px;
+        border-radius: 10px;
         ">
         {text}
     </div>
         # Display the detailed dilemma and additional information
         st.markdown(
             colored_text_box(
+                scenario_details["detailed_dilemma"],
+                "#01204E",
+                "white",
             ),
             unsafe_allow_html=True,
         )
         with st.expander("Additional Information"):
+            st.write(
+                {
+                    "LLM Author": scenario_details["llm_author"],
+                    "Problem": scenario_details["problem"],
+                    "Relationship": scenario_details["relationship"],
+                    "Scenario": scenario_details["scenario"],
+                }
+            )
     st.divider()
             # Display the response string
             st.markdown(
                 colored_text_box(
+                    response_details_fixed["response_string"],
+                    "#028391",
+                    "white",
                 ),
                 unsafe_allow_html=True,
             )
             # Display the response string
             st.markdown(
                 colored_text_box(
+                    response_details_dynamic["response_string"],
+                    "#028391",
+                    "white",
                 ),
                 unsafe_allow_html=True,
             )
                 st.markdown(
                     colored_text_box(
                         judging_details_left["judging_response_string"],
+                        "#FEAE6F",
                         "black",
                     ),
                     unsafe_allow_html=True,
                 st.markdown(
                     colored_text_box(
                         judging_details_right["judging_response_string"],
+                        "#FEAE6F",
                         "black",
                     ),
                     unsafe_allow_html=True,
                 st.write("No judging details found for the selected combination.")
 with tabs[2]:
+    st.markdown("### Battles (Respondent vs. Respondent)")
+    st.write("Expected win rates based on Terry-Bradley coefficients:")
+    image = Image.open("img/llm_vs_llm_win_rates.png")
+    img_base64 = pil_to_base64(image)
+    centered_image_html = f"""
+    <div style="text-align: center;">
+        <img src="data:image/png;base64,{img_base64}" width="1000"/>
+    </div>
+    """
+    st.markdown(centered_image_html, unsafe_allow_html=True)
+    st.markdown("### Affinities (Judge vs. Respondent)")
+    st.write("Raw affinities:")
+    image = Image.open("img/raw.png")
+    img_base64 = pil_to_base64(image)
+    centered_image_html = f"""
+    <div style="text-align: center;">
+        <img src="data:image/png;base64,{img_base64}" width="1000"/>
+    </div>
+    """
+    st.markdown(centered_image_html, unsafe_allow_html=True)
+    st.write("Council-Normalized:")
+    image = Image.open("img/council_normalized.png")
+    img_base64 = pil_to_base64(image)
+    centered_image_html = f"""
+    <div style="text-align: center;">
+        <img src="data:image/png;base64,{img_base64}" width="1000"/>
+    </div>
+    """
+    st.markdown(centered_image_html, unsafe_allow_html=True)
+    st.markdown("### Agreement (Judge vs. Judge)")
+    st.write("Sidewise Cohen's Kappa:")
+    image = Image.open("img/judge_agreement.sidewise_cohen_kappa.png")
+    img_base64 = pil_to_base64(image)
+    centered_image_html = f"""
+    <div style="text-align: center;">
+        <img src="data:image/png;base64,{img_base64}" width="1000"/>
+    </div>
+    """
+    st.markdown(centered_image_html, unsafe_allow_html=True)
+    st.write("Check out the paper for more detailed analysis!")
+with tabs[-1]:
     st.write(
         """
     Please reach out if you are interested in collaborating!

img/council_normalized.png ADDED Viewed

img/judge_agreement.sidewise_cohen_kappa.png ADDED Viewed

img/llm_vs_llm_win_rates.png ADDED Viewed

img/raw.png ADDED Viewed