victormiller commited on
Commit
1064591
1 Parent(s): 9b94300

Update results.py

Browse files
Files changed (1) hide show
  1. results.py +45 -0
results.py CHANGED
@@ -342,6 +342,45 @@ fig.update_layout(
342
  # Show figure
343
  graph6 = fig
344
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
345
 
346
  intro_div = Div(
347
  H2("Perplexity Evaluation on Duplicate Data"),
@@ -396,26 +435,32 @@ llama_div = Div(
396
  Section(
397
  H2("Llama 3.1 8B"),
398
  P("For comparison purpose, we run the same perplexity evaluation with llama 3.1 8B model.")
 
399
  ),
400
  Section(
401
  H3("Perplexity vs Buckets"),
402
  Img(src="images/perp-across-diff-buckets-global.png", height = "300", width = "600" ),
 
403
  ),
404
  Section(
405
  H3("Perplexity vs Years"),
406
  Img(src="images/prep-across-diff-years-global.png", height = "300", width = "600" ),
 
407
  ),
408
  Section(
409
  H3("Perplexity vs Dump Duplication"),
410
  Img(src="images/prep-vs-dump-dup-global.png", height = "300", width = "600" ),
 
411
  ),
412
  Section(
413
  H3("Perplexity vs Local Buckets"),
414
  Img(src="images/prep-diff-buckets-local.png", height = "300", width = "600" ),
 
415
  ),
416
  Section(
417
  H3("Perplexity vs Local Dump Duplication"),
418
  Img(src="images/prep-vs-dump-dup-global.png", height = "300", width = "600" ),
 
419
  ),
420
  )
421
 
 
342
  # Show figure
343
  graph6 = fig
344
 
345
+ #llama graph 1
346
+
347
+ import plotly.graph_objects as go
348
+
349
+ # Data for different years
350
+ data = {
351
+ "2014": [10.036774097041135, 9.306693996275795, 9.442327622499175, 9.015408185880002, 9.94583162297666, 9.141712571508352],
352
+ "2015": [9.46310273785878, 8.763464863196129, 9.075851726027564, 8.868392446242012, 9.113560631617027, 8.037411460181893],
353
+ "2016": [9.41413706166537, 8.645126825996691, 9.527148465147846, 9.120345162203675, 8.9228845723255, 9.14052983061081],
354
+ "2017": [9.50318602661455, 9.473904977192573, 9.755998086072951, 8.968012141869462, 8.895860780054043, 8.757970647106037],
355
+ "2018": [9.007669062339426, 10.95829859145081, 10.128151243953157, 9.451949410987668, 8.863879736723902, 8.440366034517687],
356
+ "2019": [8.388255660116407, 10.676105294328789, 9.728353939624842, 9.381837094065533, 8.401723232809463, 7.5705604983353325],
357
+ "2020": [10.112246017864624, 10.255251179892559, 9.233548505479437, 9.25131862646364, 8.458532176757009, 7.4808205167223525],
358
+ "2021": [10.239269162661959, 9.54987953569235, 9.067380903629866, 9.014261939731549, 8.14345667720481, 7.312019290288715],
359
+ "2022": [9.931951075969451, 9.12737570591033, 8.995868137602248, 9.00805668763514, 7.882044010499616, 7.538858258386088],
360
+ "2023": [8.646614152066428, 8.806922449908505, 8.816629232137835, 8.995152677487027, 7.737747701620713, 6.77703951001925]
361
+ }
362
+
363
+ # Buckets
364
+ buckets = ["1-1", "2-5", "6-10", "11-100", "101-1000", "1001-30000000"]
365
+
366
+ # Create the figure
367
+ fig = go.Figure()
368
+
369
+ # Add lines for each year
370
+ for year, perplexities in data.items():
371
+ fig.add_trace(go.Scatter(x=buckets, y=perplexities, mode='lines+markers', name=year))
372
+
373
+ # Update layout
374
+ fig.update_layout(
375
+ title="Perplexity Across Different Buckets (Global)",
376
+ xaxis_title="Bucket (duplicate count range)",
377
+ yaxis_title="Average Perplexity",
378
+ legend_title="Year"
379
+ )
380
+
381
+ # Show the figure
382
+ llama_graph1 = fig
383
+
384
 
385
  intro_div = Div(
386
  H2("Perplexity Evaluation on Duplicate Data"),
 
435
  Section(
436
  H2("Llama 3.1 8B"),
437
  P("For comparison purpose, we run the same perplexity evaluation with llama 3.1 8B model.")
438
+ plotly2fasthtml(llama_graph1),
439
  ),
440
  Section(
441
  H3("Perplexity vs Buckets"),
442
  Img(src="images/perp-across-diff-buckets-global.png", height = "300", width = "600" ),
443
+ #plotly2fasthtml(llama_graph1),
444
  ),
445
  Section(
446
  H3("Perplexity vs Years"),
447
  Img(src="images/prep-across-diff-years-global.png", height = "300", width = "600" ),
448
+ #plotly2fasthtml(llama_graph1),
449
  ),
450
  Section(
451
  H3("Perplexity vs Dump Duplication"),
452
  Img(src="images/prep-vs-dump-dup-global.png", height = "300", width = "600" ),
453
+ #plotly2fasthtml(llama_graph1),
454
  ),
455
  Section(
456
  H3("Perplexity vs Local Buckets"),
457
  Img(src="images/prep-diff-buckets-local.png", height = "300", width = "600" ),
458
+ #plotly2fasthtml(llama_graph1),
459
  ),
460
  Section(
461
  H3("Perplexity vs Local Dump Duplication"),
462
  Img(src="images/prep-vs-dump-dup-global.png", height = "300", width = "600" ),
463
+ #plotly2fasthtml(llama_graph1),
464
  ),
465
  )
466