Spaces:
Sleeping
Sleeping
omkarenator
commited on
Commit
•
b6c56e9
1
Parent(s):
7e8dbcd
add more data sources
Browse files- curated.py +19 -1
- main.py +3 -3
curated.py
CHANGED
@@ -9,7 +9,7 @@ data_sources = [
|
|
9 |
"Arxiv",
|
10 |
"S2ORC",
|
11 |
"S2ORC Abstract",
|
12 |
-
"
|
13 |
"USPTO",
|
14 |
"Hackernews",
|
15 |
"Ubuntu IRC",
|
@@ -54,6 +54,24 @@ def get_data(data_source: str = "Freelaw", doc_id: int = 3):
|
|
54 |
raw_sample_doc = extracted_sample_doc = json.load(
|
55 |
open("data/curated_samples/s2orc_abstract_raw.json")
|
56 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
57 |
else:
|
58 |
raw_sample_doc = extracted_sample_doc = [{} for _ in range(10)]
|
59 |
|
|
|
9 |
"Arxiv",
|
10 |
"S2ORC",
|
11 |
"S2ORC Abstract",
|
12 |
+
"Pubmed",
|
13 |
"USPTO",
|
14 |
"Hackernews",
|
15 |
"Ubuntu IRC",
|
|
|
54 |
raw_sample_doc = extracted_sample_doc = json.load(
|
55 |
open("data/curated_samples/s2orc_abstract_raw.json")
|
56 |
)
|
57 |
+
elif data_source == "Pubmed":
|
58 |
+
raw_sample_doc = json.load(open("data/curated_samples/pubmed_raw.json"))
|
59 |
+
extracted_sample_doc = json.load(
|
60 |
+
open("data/curated_samples/pubmed_extract.json")
|
61 |
+
)
|
62 |
+
elif data_source == "DM Maths":
|
63 |
+
raw_sample_doc = json.load(open("data/curated_samples/dm_maths_raw.json"))
|
64 |
+
extracted_sample_doc = json.load(
|
65 |
+
open("data/curated_samples/dm_maths_extract.json")
|
66 |
+
)
|
67 |
+
elif data_source == "PG19":
|
68 |
+
raw_sample_doc = extracted_sample_doc = json.load(
|
69 |
+
open("data/curated_samples/pg19_raw.json")
|
70 |
+
)
|
71 |
+
elif data_source == "Europarl":
|
72 |
+
raw_sample_doc = extracted_sample_doc = json.load(
|
73 |
+
open("data/curated_samples/europarl_raw.json")
|
74 |
+
)
|
75 |
else:
|
76 |
raw_sample_doc = extracted_sample_doc = [{} for _ in range(10)]
|
77 |
|
main.py
CHANGED
@@ -357,11 +357,11 @@ def curated(request):
|
|
357 |
|
358 |
return Div(
|
359 |
Section(
|
360 |
-
|
361 |
plotly2fasthtml(get_chart_28168342()),
|
362 |
-
|
363 |
table_div,
|
364 |
-
|
365 |
expander,
|
366 |
id="inner-text",
|
367 |
)
|
|
|
357 |
|
358 |
return Div(
|
359 |
Section(
|
360 |
+
H2("Curated Sources"),
|
361 |
plotly2fasthtml(get_chart_28168342()),
|
362 |
+
H3("Data Preparation"),
|
363 |
table_div,
|
364 |
+
H3("Data Preprocessing"),
|
365 |
expander,
|
366 |
id="inner-text",
|
367 |
)
|