victormiller
commited on
Commit
•
4028499
1
Parent(s):
9f87a47
Update curated.py
Browse files- curated.py +1 -0
curated.py
CHANGED
@@ -7,6 +7,7 @@ import json
|
|
7 |
from data_viewer import view_data, gen_random_id
|
8 |
from rich import print
|
9 |
import uuid
|
|
|
10 |
|
11 |
overview_text = P("Curated sources comprise high-quality datasets that contain domain-specificity. These sources, such as Arxiv, Wikipedia, and Stack Exchange, provide valuable data that is excluded from the web dataset mentioned above. Analyzing and processing non-web data can yield insights and opportunities for various applications. Details about each of the sources are provided below. ")
|
12 |
copyright_disclaimer = P("We respect the copyright of the data sources and have not included the controversial data that was used in Pile like YouTube and Opensubtitles, Reddit threads, and books.")
|
|
|
7 |
from data_viewer import view_data, gen_random_id
|
8 |
from rich import print
|
9 |
import uuid
|
10 |
+
import plotly.express as px
|
11 |
|
12 |
overview_text = P("Curated sources comprise high-quality datasets that contain domain-specificity. These sources, such as Arxiv, Wikipedia, and Stack Exchange, provide valuable data that is excluded from the web dataset mentioned above. Analyzing and processing non-web data can yield insights and opportunities for various applications. Details about each of the sources are provided below. ")
|
13 |
copyright_disclaimer = P("We respect the copyright of the data sources and have not included the controversial data that was used in Pile like YouTube and Opensubtitles, Reddit threads, and books.")
|