victormiller commited on
Commit
f44dec9
1 Parent(s): aa54686

Update curated.py

Browse files
Files changed (1) hide show
  1. curated.py +5 -5
curated.py CHANGED
@@ -535,7 +535,7 @@ def get_se_data(data_source: str = "StackExchange", doc_id: int = 3, target: str
535
  target=target,
536
  )
537
 
538
- se_examples = DV2("data/curated_samples/stackexchange_raw.json", "data/curated_samples/stackexchange_extract.json", 0)
539
 
540
  def get_phil_data(data_source: str = "PhilPapers", doc_id: int = 3, target: str = "foo"):
541
  doc_id = max(0, min(int(doc_id), 9))
@@ -558,7 +558,7 @@ def get_phil_data(data_source: str = "PhilPapers", doc_id: int = 3, target: str
558
  target=target,
559
  )
560
 
561
- phil_examples = DV("data/curated_samples/philpapers_raw.json", 0, "PhilPapers")
562
 
563
  def get_arx_data(data_source: str = "Arxiv", doc_id: int = 3, target: str = "foo"):
564
  doc_id = max(0, min(int(doc_id), 9))
@@ -582,7 +582,7 @@ def get_arx_data(data_source: str = "Arxiv", doc_id: int = 3, target: str = "foo
582
  target=target,
583
  )
584
 
585
- arx_examples = DV("data/curated_samples/arxiv_extract.json", 0, "Arxiv")
586
 
587
  def get_S2ORC_data(data_source: str = "S2ORC", doc_id: int = 3, target: str = "foo"):
588
  doc_id = max(0, min(int(doc_id), 9))
@@ -652,7 +652,7 @@ def get_pubmed_data(data_source: str = "Pubmed", doc_id: int = 3, target: str =
652
  target=target,
653
  )
654
 
655
- pubmed_examples = DV("data/curated_samples/pubmed_extract.json", 0, "PubMed")
656
 
657
  def get_dmm_data(data_source: str = "DM Math", doc_id: int = 3, target: str = "foo"):
658
  doc_id = max(0, min(int(doc_id), 9))
@@ -676,7 +676,7 @@ def get_dmm_data(data_source: str = "DM Math", doc_id: int = 3, target: str = "f
676
  target=target,
677
  )
678
 
679
- dmm_examples = DV("data/curated_samples/dm_maths_extract.json", 0, "DM Math")
680
 
681
  def get_pg19_data(data_source: str = "PG19", doc_id: int = 3, target: str = "foo"):
682
  doc_id = max(0, min(int(doc_id), 9))
 
535
  target=target,
536
  )
537
 
538
+ se_examples = DV2("data/curated_samples/stackexchange_raw.json", "data/curated_samples/stackexchange_extract.json", 3)
539
 
540
  def get_phil_data(data_source: str = "PhilPapers", doc_id: int = 3, target: str = "foo"):
541
  doc_id = max(0, min(int(doc_id), 9))
 
558
  target=target,
559
  )
560
 
561
+ phil_examples = DV("data/curated_samples/philpapers_raw.json", 2, "PhilPapers")
562
 
563
  def get_arx_data(data_source: str = "Arxiv", doc_id: int = 3, target: str = "foo"):
564
  doc_id = max(0, min(int(doc_id), 9))
 
582
  target=target,
583
  )
584
 
585
+ arx_examples = DV2("data/curated_samples/arxiv_raw.json", "data/curated_samples/arxiv_extract.json", 3)
586
 
587
  def get_S2ORC_data(data_source: str = "S2ORC", doc_id: int = 3, target: str = "foo"):
588
  doc_id = max(0, min(int(doc_id), 9))
 
652
  target=target,
653
  )
654
 
655
+ pubmed_examples = DV2("data/curated_samples/pubmed_raw.json", "data/curated_samples/pubmed_extract.json", 3)
656
 
657
  def get_dmm_data(data_source: str = "DM Math", doc_id: int = 3, target: str = "foo"):
658
  doc_id = max(0, min(int(doc_id), 9))
 
676
  target=target,
677
  )
678
 
679
+ dmm_examples = DV("data/curated_samples/dm_maths_raw.json", "data/curated_samples/dm_maths_extract.json", 3)
680
 
681
  def get_pg19_data(data_source: str = "PG19", doc_id: int = 3, target: str = "foo"):
682
  doc_id = max(0, min(int(doc_id), 9))