victormiller commited on
Commit
ce3069e
1 Parent(s): 41451ff

Update results.py

Browse files
Files changed (1) hide show
  1. results.py +59 -3
results.py CHANGED
@@ -416,6 +416,62 @@ fig.update_layout(
416
  # Show the figure
417
  llama_graph2 = fig
418
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
419
  intro_div = Div(
420
  H2("Perplexity Evaluation on Duplicate Data"),
421
  H3("Model based Quality Estimation"),
@@ -483,17 +539,17 @@ llama_div = Div(
483
  Section(
484
  H3("Perplexity vs Dump Duplication"),
485
  Img(src="images/prep-vs-dump-dup-global.png", height = "300", width = "600" ),
486
- #plotly2fasthtml(llama_graph1),
487
  ),
488
  Section(
489
  H3("Perplexity vs Local Buckets"),
490
  Img(src="images/prep-diff-buckets-local.png", height = "300", width = "600" ),
491
- #plotly2fasthtml(llama_graph1),
492
  ),
493
  Section(
494
  H3("Perplexity vs Local Dump Duplication"),
495
  Img(src="images/prep-vs-dump-dup-global.png", height = "300", width = "600" ),
496
- #plotly2fasthtml(llama_graph1),
497
  ),
498
  )
499
 
 
416
  # Show the figure
417
  llama_graph2 = fig
418
 
419
+
420
+ #llama graph 3
421
+ #tbd
422
+ #llama graph 4
423
+
424
+
425
+ # Data for different buckets and years
426
+ data = {
427
+ "1-1": {
428
+ "years": ["2014", "2015", "2016", "2017", "2018", "2019", "2020", "2021", "2022", "2023"],
429
+ "perplexities": [10.036774097041135, 9.46310273785878, 9.41413706166537, 9.50318602661455, 9.007669062339426, 8.388255660116407, 10.112246017864624, 10.239269162661959, 9.931951075969451, 8.646614152066428]
430
+ },
431
+ "2-5": {
432
+ "years": ["2014", "2015", "2016", "2017", "2018", "2019", "2020", "2021", "2022", "2023"],
433
+ "perplexities": [9.306693996275795, 8.763464863196129, 8.645126825996691, 9.473904977192573, 10.95829859145081, 10.676105294328789, 10.255251179892559, 9.54987953569235, 9.12737570591033, 8.806922449908505]
434
+ },
435
+ "6-10": {
436
+ "years": ["2014", "2015", "2016", "2017", "2018", "2019", "2020", "2021", "2022", "2023"],
437
+ "perplexities": [9.442327622499175, 9.075851726027564, 9.527148465147846, 9.755998086072951, 10.128151243953157, 9.728353939624842, 9.233548505479437, 9.067380903629866, 8.995868137602248, 8.816629232137835]
438
+ },
439
+ "11-100": {
440
+ "years": ["2014", "2015", "2016", "2017", "2018", "2019", "2020", "2021", "2022", "2023"],
441
+ "perplexities": [9.015408185880002, 8.868392446242012, 9.120345162203675, 8.968012141869462, 9.451949410987668, 9.381837094065533, 9.25131862646364, 9.014261939731549, 9.00805668763514, 8.995152677487027]
442
+ },
443
+ "101-1000": {
444
+ "years": ["2014", "2015", "2016", "2017", "2018", "2019", "2020", "2021", "2022", "2023"],
445
+ "perplexities": [9.94583162297666, 9.113560631617027, 8.9228845723255, 8.895860780054043, 8.863879736723902, 8.401723232809463, 8.458532176757009, 8.14345667720481, 7.882044010499616, 7.737747701620713]
446
+ },
447
+ "1001-30000000": {
448
+ "years": ["2014", "2015", "2016", "2017", "2018", "2019", "2020", "2021", "2022", "2023"],
449
+ "perplexities": [9.141712571508352, 8.037411460181893, 9.14052983061081, 8.757970647106037, 8.440366034517687, 7.5705604983353325, 7.4808205167223525, 7.312019290288715, 7.538858258386088, 6.77703951001925]
450
+ }
451
+ }
452
+
453
+ # Create figure
454
+ fig = go.Figure()
455
+
456
+ # Add traces for each bucket
457
+ for bucket, bucket_data in data.items():
458
+ fig.add_trace(go.Scatter(x=bucket_data["years"], y=bucket_data["perplexities"], mode='lines+markers', name=bucket))
459
+
460
+ # Update layout
461
+ fig.update_layout(
462
+ title="Perplexity Across Different Years for Various Buckets (Global)",
463
+ xaxis_title="Year",
464
+ yaxis_title="Average Perplexity",
465
+ legend_title="Bucket (Duplicate Count Range)"
466
+ )
467
+
468
+ # Show the figure
469
+ llama_graph4 = fig
470
+
471
+ ##llama graph 5
472
+
473
+
474
+
475
  intro_div = Div(
476
  H2("Perplexity Evaluation on Duplicate Data"),
477
  H3("Model based Quality Estimation"),
 
539
  Section(
540
  H3("Perplexity vs Dump Duplication"),
541
  Img(src="images/prep-vs-dump-dup-global.png", height = "300", width = "600" ),
542
+ #plotly2fasthtml(llama_graph3),
543
  ),
544
  Section(
545
  H3("Perplexity vs Local Buckets"),
546
  Img(src="images/prep-diff-buckets-local.png", height = "300", width = "600" ),
547
+ plotly2fasthtml(llama_graph4),
548
  ),
549
  Section(
550
  H3("Perplexity vs Local Dump Duplication"),
551
  Img(src="images/prep-vs-dump-dup-global.png", height = "300", width = "600" ),
552
+ #plotly2fasthtml(llama_graph5),
553
  ),
554
  )
555