Spaces:
Running
Running
Update.
Browse files- core/state.py +7 -0
- cypress.config.js +1 -0
- cypress/e2e/createManually.cy.js +1 -1
- cypress/e2e/uploadCsv.cy.js +1 -1
- views/files.py +2 -5
- views/load.py +23 -21
- views/record_sets.py +14 -2
- views/splash.py +17 -4
core/state.py
CHANGED
@@ -20,6 +20,7 @@ from core.constants import OAUTH_CLIENT_SECRET
|
|
20 |
from core.constants import PAST_PROJECTS_PATH
|
21 |
from core.constants import PROJECT_FOLDER_PATTERN
|
22 |
from core.constants import REDIRECT_URI
|
|
|
23 |
import mlcroissant as mlc
|
24 |
|
25 |
|
@@ -257,6 +258,8 @@ class Metadata:
|
|
257 |
del self.distribution[key]
|
258 |
|
259 |
def add_record_set(self, record_set: RecordSet) -> None:
|
|
|
|
|
260 |
self.record_sets.append(record_set)
|
261 |
|
262 |
def remove_record_set(self, key: int) -> None:
|
@@ -323,3 +326,7 @@ class Metadata:
|
|
323 |
distribution=distribution,
|
324 |
record_sets=record_sets,
|
325 |
)
|
|
|
|
|
|
|
|
|
|
20 |
from core.constants import PAST_PROJECTS_PATH
|
21 |
from core.constants import PROJECT_FOLDER_PATTERN
|
22 |
from core.constants import REDIRECT_URI
|
23 |
+
from core.names import find_unique_name
|
24 |
import mlcroissant as mlc
|
25 |
|
26 |
|
|
|
258 |
del self.distribution[key]
|
259 |
|
260 |
def add_record_set(self, record_set: RecordSet) -> None:
|
261 |
+
name = find_unique_name(self.names(), record_set.name)
|
262 |
+
record_set.name = name
|
263 |
self.record_sets.append(record_set)
|
264 |
|
265 |
def remove_record_set(self, key: int) -> None:
|
|
|
326 |
distribution=distribution,
|
327 |
record_sets=record_sets,
|
328 |
)
|
329 |
+
|
330 |
+
def names(self) -> set[str]:
|
331 |
+
nodes = self.distribution + self.record_sets
|
332 |
+
return set([node.name for node in nodes])
|
cypress.config.js
CHANGED
@@ -3,5 +3,6 @@ const { defineConfig } = require("cypress");
|
|
3 |
module.exports = defineConfig({
|
4 |
// To access content within Streamlit iframes for custom components:
|
5 |
chromeWebSecurity: false,
|
|
|
6 |
e2e: {},
|
7 |
});
|
|
|
3 |
module.exports = defineConfig({
|
4 |
// To access content within Streamlit iframes for custom components:
|
5 |
chromeWebSecurity: false,
|
6 |
+
defaultCommandTimeout: 10000,
|
7 |
e2e: {},
|
8 |
});
|
cypress/e2e/createManually.cy.js
CHANGED
@@ -8,7 +8,7 @@ describe('Create a resource manually', () => {
|
|
8 |
it('should allow adding a FileObject resource', () => {
|
9 |
// Streamlit starts on :8501.
|
10 |
cy.visit('http://localhost:8501')
|
11 |
-
cy.get('button'
|
12 |
cy.get('input[aria-label="Name:red[*]"]').type('MyDataset').blur()
|
13 |
cy.get('[data-testid="stMarkdownContainer"]')
|
14 |
.contains('Metadata')
|
|
|
8 |
it('should allow adding a FileObject resource', () => {
|
9 |
// Streamlit starts on :8501.
|
10 |
cy.visit('http://localhost:8501')
|
11 |
+
cy.get('button').contains('Create').click()
|
12 |
cy.get('input[aria-label="Name:red[*]"]').type('MyDataset').blur()
|
13 |
cy.get('[data-testid="stMarkdownContainer"]')
|
14 |
.contains('Metadata')
|
cypress/e2e/uploadCsv.cy.js
CHANGED
@@ -8,7 +8,7 @@ describe('Editor loads a local CSV as a resource', () => {
|
|
8 |
it('should display the form: Overview, Metadata, Resources, & Record Sets', () => {
|
9 |
// Streamlit starts on :8501.
|
10 |
cy.visit('http://localhost:8501')
|
11 |
-
cy.get('button'
|
12 |
|
13 |
cy.get('input[aria-label="Name:red[*]"]').type('MyDataset').blur()
|
14 |
cy.get('[data-testid="stMarkdownContainer"]')
|
|
|
8 |
it('should display the form: Overview, Metadata, Resources, & Record Sets', () => {
|
9 |
// Streamlit starts on :8501.
|
10 |
cy.visit('http://localhost:8501')
|
11 |
+
cy.get('button').contains('Create').click()
|
12 |
|
13 |
cy.get('input[aria-label="Name:red[*]"]').type('MyDataset').blur()
|
14 |
cy.get('[data-testid="stMarkdownContainer"]')
|
views/files.py
CHANGED
@@ -113,11 +113,8 @@ def _render_upload_panel():
|
|
113 |
url = st.session_state[_DISTANT_URL_KEY]
|
114 |
uploaded_file = st.session_state[_LOCAL_FILE_KEY]
|
115 |
file_type = FILE_TYPES[file_type_name]
|
116 |
-
|
117 |
-
|
118 |
-
+ st.session_state[Metadata].record_sets
|
119 |
-
)
|
120 |
-
names = set([node.name for node in nodes])
|
121 |
if url:
|
122 |
file = file_from_url(file_type, url, names)
|
123 |
elif uploaded_file:
|
|
|
113 |
url = st.session_state[_DISTANT_URL_KEY]
|
114 |
uploaded_file = st.session_state[_LOCAL_FILE_KEY]
|
115 |
file_type = FILE_TYPES[file_type_name]
|
116 |
+
metadata: Metadata = st.session_state[Metadata]
|
117 |
+
names = metadata.names()
|
|
|
|
|
|
|
118 |
if url:
|
119 |
file = file_from_url(file_type, url, names)
|
120 |
elif uploaded_file:
|
views/load.py
CHANGED
@@ -3,6 +3,7 @@ import os
|
|
3 |
from etils import epath
|
4 |
import streamlit as st
|
5 |
|
|
|
6 |
from core.past_projects import save_current_project
|
7 |
from core.state import CurrentStep
|
8 |
from core.state import Metadata
|
@@ -10,25 +11,26 @@ import mlcroissant as mlc
|
|
10 |
from utils import jump_to
|
11 |
|
12 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
13 |
def render_load():
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
newfile_name = (
|
19 |
-
epath.Path("~").expanduser()
|
20 |
-
/ ".cache"
|
21 |
-
/ "croissant"
|
22 |
-
/ "loaded_croissant"
|
23 |
-
)
|
24 |
-
os.makedirs(os.path.dirname(newfile_name), exist_ok=True)
|
25 |
-
with open(newfile_name, mode="wb+") as outfile:
|
26 |
-
outfile.write(file_cont)
|
27 |
-
dataset = mlc.Dataset(newfile_name)
|
28 |
-
st.session_state[Metadata] = Metadata.from_canonical(dataset.metadata)
|
29 |
-
jump_to(CurrentStep.editor)
|
30 |
-
save_current_project()
|
31 |
-
st.rerun()
|
32 |
-
except mlc.ValidationError as e:
|
33 |
-
st.warning(e)
|
34 |
-
st.toast(body="Invalid Croissant File!", icon="🔥")
|
|
|
3 |
from etils import epath
|
4 |
import streamlit as st
|
5 |
|
6 |
+
from core.constants import EDITOR_CACHE
|
7 |
from core.past_projects import save_current_project
|
8 |
from core.state import CurrentStep
|
9 |
from core.state import Metadata
|
|
|
11 |
from utils import jump_to
|
12 |
|
13 |
|
14 |
+
def _on_file_upload(key):
|
15 |
+
file = st.session_state[key]
|
16 |
+
file_cont = file.read()
|
17 |
+
# TODO(marcenacp): The Python library should support loading from an open file/dict.
|
18 |
+
newfile_name = EDITOR_CACHE / "loaded_croissant"
|
19 |
+
os.makedirs(os.path.dirname(newfile_name), exist_ok=True)
|
20 |
+
with open(newfile_name, mode="wb+") as outfile:
|
21 |
+
outfile.write(file_cont)
|
22 |
+
try:
|
23 |
+
dataset = mlc.Dataset(newfile_name)
|
24 |
+
st.session_state[Metadata] = Metadata.from_canonical(dataset.metadata)
|
25 |
+
jump_to(CurrentStep.editor)
|
26 |
+
save_current_project()
|
27 |
+
except mlc.ValidationError as e:
|
28 |
+
st.warning(e)
|
29 |
+
st.toast(body="Invalid Croissant File!", icon="🔥")
|
30 |
+
|
31 |
+
|
32 |
def render_load():
|
33 |
+
key = "json-ld-file-upload"
|
34 |
+
st.file_uploader(
|
35 |
+
"Select a JSON-LD", type="json", key=key, on_change=_on_file_upload, args=(key,)
|
36 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
views/record_sets.py
CHANGED
@@ -83,6 +83,11 @@ def _find_joins(fields: list[Field]) -> set[Join]:
|
|
83 |
return joins
|
84 |
|
85 |
|
|
|
|
|
|
|
|
|
|
|
86 |
def _handle_fields_change(record_set_key: int, record_set: RecordSet):
|
87 |
data_editor_key = _data_editor_key(record_set_key, record_set)
|
88 |
result = st.session_state[data_editor_key]
|
@@ -148,7 +153,7 @@ def _render_left_panel():
|
|
148 |
record_sets = st.session_state[Metadata].record_sets
|
149 |
record_set: RecordSet
|
150 |
for record_set_key, record_set in enumerate(record_sets):
|
151 |
-
title = f"**{record_set.name}** ({len(record_set.fields)} fields)"
|
152 |
prefix = f"record-set-{record_set_key}"
|
153 |
with st.expander(title, expanded=False):
|
154 |
col1, col2 = st.columns([1, 3])
|
@@ -238,7 +243,8 @@ def _render_left_panel():
|
|
238 |
)
|
239 |
st.data_editor(
|
240 |
fields,
|
241 |
-
|
|
|
242 |
num_rows="dynamic",
|
243 |
key=data_editor_key,
|
244 |
column_config={
|
@@ -269,6 +275,12 @@ def _render_left_panel():
|
|
269 |
on_click=_handle_on_click_field,
|
270 |
args=(record_set_key, record_set),
|
271 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
272 |
|
273 |
|
274 |
def _render_right_panel():
|
|
|
83 |
return joins
|
84 |
|
85 |
|
86 |
+
def _handle_create_record_set():
|
87 |
+
metadata: Metadata = st.session_state[Metadata]
|
88 |
+
metadata.add_record_set(RecordSet(name="new-record-set", description=""))
|
89 |
+
|
90 |
+
|
91 |
def _handle_fields_change(record_set_key: int, record_set: RecordSet):
|
92 |
data_editor_key = _data_editor_key(record_set_key, record_set)
|
93 |
result = st.session_state[data_editor_key]
|
|
|
153 |
record_sets = st.session_state[Metadata].record_sets
|
154 |
record_set: RecordSet
|
155 |
for record_set_key, record_set in enumerate(record_sets):
|
156 |
+
title = f"**{record_set.name or '-'}** ({len(record_set.fields)} fields)"
|
157 |
prefix = f"record-set-{record_set_key}"
|
158 |
with st.expander(title, expanded=False):
|
159 |
col1, col2 = st.columns([1, 3])
|
|
|
243 |
)
|
244 |
st.data_editor(
|
245 |
fields,
|
246 |
+
# There is a bug with `st.data_editor` when the df is empty.
|
247 |
+
use_container_width=not fields.empty,
|
248 |
num_rows="dynamic",
|
249 |
key=data_editor_key,
|
250 |
column_config={
|
|
|
275 |
on_click=_handle_on_click_field,
|
276 |
args=(record_set_key, record_set),
|
277 |
)
|
278 |
+
st.button(
|
279 |
+
"Create a new RecordSet",
|
280 |
+
key=f"create-new-record-set",
|
281 |
+
type="primary",
|
282 |
+
on_click=_handle_create_record_set,
|
283 |
+
)
|
284 |
|
285 |
|
286 |
def _render_right_panel():
|
views/splash.py
CHANGED
@@ -40,8 +40,8 @@ def render_splash():
|
|
40 |
)
|
41 |
with st.expander("**Try out an example!**", expanded=True):
|
42 |
|
43 |
-
def create_example():
|
44 |
-
url = "https://raw.githubusercontent.com/mlcommons/croissant/main/datasets/
|
45 |
try:
|
46 |
json = requests.get(url).json()
|
47 |
metadata = mlc.Metadata.from_json(mlc.Issues(), json, None)
|
@@ -50,16 +50,29 @@ def render_splash():
|
|
50 |
jump_to(CurrentStep.editor)
|
51 |
except Exception as exception:
|
52 |
logging.error(exception)
|
53 |
-
st.
|
54 |
"Sorry, it seems that the example is broken... Can you please"
|
55 |
" [open an issue on"
|
56 |
" GitHub](https://github.com/mlcommons/croissant/issues/new)?"
|
57 |
)
|
58 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
59 |
st.button(
|
60 |
-
"
|
61 |
on_click=create_example,
|
62 |
type="primary",
|
|
|
63 |
)
|
64 |
with col2:
|
65 |
with st.expander("**Past projects**", expanded=True):
|
|
|
40 |
)
|
41 |
with st.expander("**Try out an example!**", expanded=True):
|
42 |
|
43 |
+
def create_example(dataset: str):
|
44 |
+
url = f"https://raw.githubusercontent.com/mlcommons/croissant/main/datasets/{dataset.lower()}/metadata.json"
|
45 |
try:
|
46 |
json = requests.get(url).json()
|
47 |
metadata = mlc.Metadata.from_json(mlc.Issues(), json, None)
|
|
|
50 |
jump_to(CurrentStep.editor)
|
51 |
except Exception as exception:
|
52 |
logging.error(exception)
|
53 |
+
st.error(
|
54 |
"Sorry, it seems that the example is broken... Can you please"
|
55 |
" [open an issue on"
|
56 |
" GitHub](https://github.com/mlcommons/croissant/issues/new)?"
|
57 |
)
|
58 |
|
59 |
+
dataset = st.selectbox(
|
60 |
+
label="Dataset",
|
61 |
+
options=[
|
62 |
+
"Titanic",
|
63 |
+
"FLORES-200",
|
64 |
+
"GPT-3",
|
65 |
+
"COCO2014",
|
66 |
+
"PASS",
|
67 |
+
"MovieLens",
|
68 |
+
"Bigcode-The-Stack",
|
69 |
+
],
|
70 |
+
)
|
71 |
st.button(
|
72 |
+
f"{dataset} dataset",
|
73 |
on_click=create_example,
|
74 |
type="primary",
|
75 |
+
args=(dataset,),
|
76 |
)
|
77 |
with col2:
|
78 |
with st.expander("**Past projects**", expanded=True):
|