omkarenator commited on
Commit
b6c56e9
1 Parent(s): 7e8dbcd

add more data sources

Browse files
Files changed (2) hide show
  1. curated.py +19 -1
  2. main.py +3 -3
curated.py CHANGED
@@ -9,7 +9,7 @@ data_sources = [
9
  "Arxiv",
10
  "S2ORC",
11
  "S2ORC Abstract",
12
- "Pubmeds",
13
  "USPTO",
14
  "Hackernews",
15
  "Ubuntu IRC",
@@ -54,6 +54,24 @@ def get_data(data_source: str = "Freelaw", doc_id: int = 3):
54
  raw_sample_doc = extracted_sample_doc = json.load(
55
  open("data/curated_samples/s2orc_abstract_raw.json")
56
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57
  else:
58
  raw_sample_doc = extracted_sample_doc = [{} for _ in range(10)]
59
 
 
9
  "Arxiv",
10
  "S2ORC",
11
  "S2ORC Abstract",
12
+ "Pubmed",
13
  "USPTO",
14
  "Hackernews",
15
  "Ubuntu IRC",
 
54
  raw_sample_doc = extracted_sample_doc = json.load(
55
  open("data/curated_samples/s2orc_abstract_raw.json")
56
  )
57
+ elif data_source == "Pubmed":
58
+ raw_sample_doc = json.load(open("data/curated_samples/pubmed_raw.json"))
59
+ extracted_sample_doc = json.load(
60
+ open("data/curated_samples/pubmed_extract.json")
61
+ )
62
+ elif data_source == "DM Maths":
63
+ raw_sample_doc = json.load(open("data/curated_samples/dm_maths_raw.json"))
64
+ extracted_sample_doc = json.load(
65
+ open("data/curated_samples/dm_maths_extract.json")
66
+ )
67
+ elif data_source == "PG19":
68
+ raw_sample_doc = extracted_sample_doc = json.load(
69
+ open("data/curated_samples/pg19_raw.json")
70
+ )
71
+ elif data_source == "Europarl":
72
+ raw_sample_doc = extracted_sample_doc = json.load(
73
+ open("data/curated_samples/europarl_raw.json")
74
+ )
75
  else:
76
  raw_sample_doc = extracted_sample_doc = [{} for _ in range(10)]
77
 
main.py CHANGED
@@ -357,11 +357,11 @@ def curated(request):
357
 
358
  return Div(
359
  Section(
360
- H1("Curated Sources"),
361
  plotly2fasthtml(get_chart_28168342()),
362
- H2("Data Preparation"),
363
  table_div,
364
- H2("Data Preprocessing"),
365
  expander,
366
  id="inner-text",
367
  )
 
357
 
358
  return Div(
359
  Section(
360
+ H2("Curated Sources"),
361
  plotly2fasthtml(get_chart_28168342()),
362
+ H3("Data Preparation"),
363
  table_div,
364
+ H3("Data Preprocessing"),
365
  expander,
366
  id="inner-text",
367
  )