import pandas as pd import streamlit as st import config best_results_df = pd.read_csv(config.best_tasks_path) worst_results_df = pd.read_csv(config.worst_tasks_path) show_worst_best_statistics = st.sidebar.checkbox( label="show worst/best statistics grouped by area" ) show_area_aggregated_results = st.sidebar.checkbox( label="show results aggregated by area" ) if show_worst_best_statistics: st.markdown( """ ## Worst/best queries The following are top 10 worst/best queries per area by number of hits. There are at least 10 documents per query in the test set, so number of hits/10 is the accuracy. """ ) sort_key = st.selectbox("sort by", list(best_results_df.columns)) st.markdown("## Queries with best results") st.table(best_results_df.sort_values(sort_key, ascending=False)) st.markdown("## Queries with worst results") st.table(worst_results_df.sort_values(sort_key, ascending=False)) if show_area_aggregated_results: st.markdown("## Area aggregated results") best_results_agg = best_results_df.groupby("area").agg("mean").reset_index() worst_results_agg = worst_results_df.groupby("area").agg("mean").reset_index() sort_key = st.selectbox("sort by", list(best_results_agg.columns)) st.markdown("Best results") st.table(best_results_agg.sort_values(sort_key, ascending=False)) st.markdown("Worst results") st.table(worst_results_agg.sort_values(sort_key, ascending=False))