victormiller commited on
Commit
f66aa60
1 Parent(s): 0dba356

Update curated.py

Browse files
Files changed (1) hide show
  1. curated.py +36 -26
curated.py CHANGED
@@ -438,6 +438,36 @@ phil_filter = pd.DataFrame(
438
  table_html_phil = phil_filter.to_html(index=False, border=0)
439
  table_div_phil = Div(NotStr(table_html_phil), style="margin: 40px;")
440
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
441
  filtering_process = Div(
442
  Section(
443
  H3("This section contains the specific filtering steps taken for all 14 curated datasets."),
@@ -622,6 +652,11 @@ filtering_process = Div(
622
  Li("Local dedup was done within freelaw itself which removed 90%+ duplicates"),
623
  ),
624
  table_div_freelaw,
 
 
 
 
 
625
  ),
626
  ),
627
  Section(
@@ -920,27 +955,7 @@ def get_data(data_source: str = "Freelaw", doc_id: int = 3, target: str = "foo")
920
 
921
 
922
 
923
- def get_freelaw_data(data_source: str = "Freelaw", doc_id: int = 3, target: str = "foo"):
924
- doc_id = max(0, min(int(doc_id), 9))
925
 
926
- if data_source == "Freelaw":
927
- raw_sample_doc = json.load(open("data/curated_samples/freelaw_raw.json"))
928
- extracted_sample_doc = json.load(
929
- open("data/curated_samples/freelaw_extract.json")
930
- )
931
- else:
932
- raw_sample_doc = extracted_sample_doc = [{} for _ in range(10)]
933
-
934
- raw_json = raw_sample_doc[doc_id]
935
- extracted_json = extracted_sample_doc[doc_id]
936
- return view_data(
937
- raw_json,
938
- extracted_json,
939
- doc_id=doc_id,
940
- data_source=data_source,
941
- data_sources=data_sources,
942
- target=target,
943
- )
944
 
945
 
946
  def update(target: str, request):
@@ -1000,12 +1015,7 @@ fig.update_layout(
1000
  # Show the plot
1001
  diff2_stacked_bar = fig
1002
 
1003
- freelaw_examples = Div(
1004
- Div(
1005
- get_freelaw_data(target=gen_random_id()),
1006
- style="border: 1px solid #ccc; padding: 20px;",
1007
- ),
1008
- )
1009
 
1010
  def curated(request):
1011
 
 
438
  table_html_phil = phil_filter.to_html(index=False, border=0)
439
  table_div_phil = Div(NotStr(table_html_phil), style="margin: 40px;")
440
 
441
+
442
+ def get_freelaw_data(data_source: str = "Freelaw", doc_id: int = 3, target: str = "foo"):
443
+ doc_id = max(0, min(int(doc_id), 9))
444
+
445
+ if data_source == "Freelaw":
446
+ raw_sample_doc = json.load(open("data/curated_samples/freelaw_raw.json"))
447
+ extracted_sample_doc = json.load(
448
+ open("data/curated_samples/freelaw_extract.json")
449
+ )
450
+ else:
451
+ raw_sample_doc = extracted_sample_doc = [{} for _ in range(10)]
452
+
453
+ raw_json = raw_sample_doc[doc_id]
454
+ extracted_json = extracted_sample_doc[doc_id]
455
+ return view_data(
456
+ raw_json,
457
+ extracted_json,
458
+ doc_id=doc_id,
459
+ data_source="Freelaw",
460
+ data_sources=data_sources,
461
+ target=target,
462
+ )
463
+
464
+ freelaw_examples = Div(
465
+ Div(
466
+ get_freelaw_data(target=gen_random_id()),
467
+ style="border: 1px solid #ccc; padding: 20px;",
468
+ ),
469
+ )
470
+
471
  filtering_process = Div(
472
  Section(
473
  H3("This section contains the specific filtering steps taken for all 14 curated datasets."),
 
652
  Li("Local dedup was done within freelaw itself which removed 90%+ duplicates"),
653
  ),
654
  table_div_freelaw,
655
+ Details(
656
+ Summary("FreeLaw Filtering Examples")
657
+ freelaw_examples,
658
+ )
659
+
660
  ),
661
  ),
662
  Section(
 
955
 
956
 
957
 
 
 
958
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
959
 
960
 
961
  def update(target: str, request):
 
1015
  # Show the plot
1016
  diff2_stacked_bar = fig
1017
 
1018
+
 
 
 
 
 
1019
 
1020
  def curated(request):
1021