victormiller
commited on
Commit
•
f29b166
1
Parent(s):
42102b3
Update results.py
Browse files- results.py +41 -1
results.py
CHANGED
@@ -45,8 +45,47 @@ fig.update_layout(
|
|
45 |
|
46 |
Perplexity_Across_Different_Buckets_global_graph = fig
|
47 |
|
|
|
48 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
49 |
|
|
|
50 |
|
51 |
|
52 |
intro_div = Div(
|
@@ -63,11 +102,13 @@ perp1_div = Div(
|
|
63 |
H3("Perplexity vs Buckets"),
|
64 |
P("For each bucket, we aggregated all the chunks that belong to a single year and calculated the average perplexity for each (bucket, year) data point."),
|
65 |
Img(src="images/prep-diff-buckets-global.png", height = "300", width = "600" ),
|
|
|
66 |
),
|
67 |
Section(
|
68 |
H3("Perplexity vs Years"),
|
69 |
P("Taking the same data, we can convert it into a graph indicating the yearly trend. For most buckets, the average perplexity of dumps from more recent years seem to be lower than that of former years."),
|
70 |
Img(src="images/prep-across-diff-year-global-dup-buckets.png", height = "300", width = "600" ),
|
|
|
71 |
),
|
72 |
Section(
|
73 |
H3("Perplexity vs Document Duplication"),
|
@@ -127,7 +168,6 @@ def results():
|
|
127 |
perp1_div,
|
128 |
llama_div,
|
129 |
P("test plotly"),
|
130 |
-
plotly2fasthtml(Perplexity_Across_Different_Buckets_global_graph),
|
131 |
id="inner-text"
|
132 |
)
|
133 |
)
|
|
|
45 |
|
46 |
Perplexity_Across_Different_Buckets_global_graph = fig
|
47 |
|
48 |
+
import plotly.graph_objects as go
|
49 |
|
50 |
+
# The data you provided
|
51 |
+
DATA = [
|
52 |
+
["2014", [["1-1", "2-5", "6-10", "11-100", "101-1000", "1001-30000000"], [17.410227605477868, 16.11176217183986, 15.632757662414805, 15.446116676532212, 16.716943171826703, 18.156821563322765]]],
|
53 |
+
["2015", [["1-1", "2-5", "6-10", "11-100", "101-1000", "1001-30000000"], [17.446573602753478, 16.14852530113782, 15.627408549576069, 15.0055028132117, 15.565430373421485, 17.314701050452452]]],
|
54 |
+
["2016", [["1-1", "2-5", "6-10", "11-100", "101-1000", "1001-30000000"], [17.307221780905284, 16.297702171159543, 15.948641884223639, 14.799690714225637, 14.935989931859659, 16.09585768919658]]],
|
55 |
+
["2017", [["1-1", "2-5", "6-10", "11-100", "101-1000", "1001-30000000"], [17.338525603992114, 15.960924352297502, 15.912187993988933, 14.822102470001267, 14.778913482337416, 15.428145290012955]]],
|
56 |
+
["2018", [["1-1", "2-5", "6-10", "11-100", "101-1000", "1001-30000000"], [17.08551151136689, 16.187802102106698, 14.935072408852303, 14.832038213200583, 14.508674264491997, 14.800605964649103]]],
|
57 |
+
["2019", [["1-1", "2-5", "6-10", "11-100", "101-1000", "1001-30000000"], [16.818363305107052, 16.474269837858706, 14.944741674400241, 14.568394784374943, 14.690158822673334, 15.990949424635108]]],
|
58 |
+
["2020", [["1-1", "2-5", "6-10", "11-100", "101-1000", "1001-30000000"], [16.98821894111693, 15.936494557783181, 14.79960386342691, 14.435682562274105, 14.58651834886038, 15.869365567783806]]],
|
59 |
+
["2021", [["1-1", "2-5", "6-10", "11-100", "101-1000", "1001-30000000"], [17.125795647512877, 15.780419457145868, 14.631430892394002, 14.276477514399625, 14.337146941773641, 15.872474774329305]]],
|
60 |
+
["2022", [["1-1", "2-5", "6-10", "11-100", "101-1000", "1001-30000000"], [16.573462144306383, 15.283018703313582, 14.378277745163881, 14.0611924390084, 13.9886330091318, 15.769421394877273]]],
|
61 |
+
["2023", [["1-1", "2-5", "6-10", "11-100", "101-1000", "1001-30000000"], [15.4293630385597, 14.608379914730168, 14.118271697056592, 13.880215644749589, 13.767106666731275, 15.05749135510839]]]
|
62 |
+
]
|
63 |
+
|
64 |
+
# Extract years and ranges (buckets)
|
65 |
+
years = [year_data[0] for year_data in DATA]
|
66 |
+
ranges = DATA[0][1][0]
|
67 |
+
all_values = [year_data[1][1] for year_data in DATA]
|
68 |
+
|
69 |
+
# Create the figure
|
70 |
+
fig = go.Figure()
|
71 |
+
|
72 |
+
# Add a trace for each range (bucket)
|
73 |
+
for i, range_label in enumerate(ranges):
|
74 |
+
values = [year_values[i] for year_values in all_values]
|
75 |
+
fig.add_trace(go.Scatter(x=years, y=values, mode='lines+markers', name=range_label))
|
76 |
+
|
77 |
+
# Update layout
|
78 |
+
fig.update_layout(
|
79 |
+
title="Perplexity over Time by Buckets",
|
80 |
+
xaxis_title="Year",
|
81 |
+
yaxis_title="Perplexity",
|
82 |
+
legend_title="Buckets",
|
83 |
+
hovermode="x unified"
|
84 |
+
)
|
85 |
+
|
86 |
+
# Show the plot
|
87 |
|
88 |
+
Perplexity_Across_Different_years_graph = fig
|
89 |
|
90 |
|
91 |
intro_div = Div(
|
|
|
102 |
H3("Perplexity vs Buckets"),
|
103 |
P("For each bucket, we aggregated all the chunks that belong to a single year and calculated the average perplexity for each (bucket, year) data point."),
|
104 |
Img(src="images/prep-diff-buckets-global.png", height = "300", width = "600" ),
|
105 |
+
plotly2fasthtml(Perplexity_Across_Different_Buckets_global_graph),
|
106 |
),
|
107 |
Section(
|
108 |
H3("Perplexity vs Years"),
|
109 |
P("Taking the same data, we can convert it into a graph indicating the yearly trend. For most buckets, the average perplexity of dumps from more recent years seem to be lower than that of former years."),
|
110 |
Img(src="images/prep-across-diff-year-global-dup-buckets.png", height = "300", width = "600" ),
|
111 |
+
plotly2fasthtml(Perplexity_Across_Different_years_graph),
|
112 |
),
|
113 |
Section(
|
114 |
H3("Perplexity vs Document Duplication"),
|
|
|
168 |
perp1_div,
|
169 |
llama_div,
|
170 |
P("test plotly"),
|
|
|
171 |
id="inner-text"
|
172 |
)
|
173 |
)
|