marcenacp commited on
Commit
0c5b67f
1 Parent(s): 5a782ad
core/state.py CHANGED
@@ -20,6 +20,7 @@ from core.constants import OAUTH_CLIENT_SECRET
20
  from core.constants import PAST_PROJECTS_PATH
21
  from core.constants import PROJECT_FOLDER_PATTERN
22
  from core.constants import REDIRECT_URI
 
23
  import mlcroissant as mlc
24
 
25
 
@@ -257,6 +258,8 @@ class Metadata:
257
  del self.distribution[key]
258
 
259
  def add_record_set(self, record_set: RecordSet) -> None:
 
 
260
  self.record_sets.append(record_set)
261
 
262
  def remove_record_set(self, key: int) -> None:
@@ -323,3 +326,7 @@ class Metadata:
323
  distribution=distribution,
324
  record_sets=record_sets,
325
  )
 
 
 
 
 
20
  from core.constants import PAST_PROJECTS_PATH
21
  from core.constants import PROJECT_FOLDER_PATTERN
22
  from core.constants import REDIRECT_URI
23
+ from core.names import find_unique_name
24
  import mlcroissant as mlc
25
 
26
 
 
258
  del self.distribution[key]
259
 
260
  def add_record_set(self, record_set: RecordSet) -> None:
261
+ name = find_unique_name(self.names(), record_set.name)
262
+ record_set.name = name
263
  self.record_sets.append(record_set)
264
 
265
  def remove_record_set(self, key: int) -> None:
 
326
  distribution=distribution,
327
  record_sets=record_sets,
328
  )
329
+
330
+ def names(self) -> set[str]:
331
+ nodes = self.distribution + self.record_sets
332
+ return set([node.name for node in nodes])
cypress.config.js CHANGED
@@ -3,5 +3,6 @@ const { defineConfig } = require("cypress");
3
  module.exports = defineConfig({
4
  // To access content within Streamlit iframes for custom components:
5
  chromeWebSecurity: false,
 
6
  e2e: {},
7
  });
 
3
  module.exports = defineConfig({
4
  // To access content within Streamlit iframes for custom components:
5
  chromeWebSecurity: false,
6
+ defaultCommandTimeout: 10000,
7
  e2e: {},
8
  });
cypress/e2e/createManually.cy.js CHANGED
@@ -8,7 +8,7 @@ describe('Create a resource manually', () => {
8
  it('should allow adding a FileObject resource', () => {
9
  // Streamlit starts on :8501.
10
  cy.visit('http://localhost:8501')
11
- cy.get('button', {timeout: 10000}).contains('Create', {timeout: 10000}).click()
12
  cy.get('input[aria-label="Name:red[*]"]').type('MyDataset').blur()
13
  cy.get('[data-testid="stMarkdownContainer"]')
14
  .contains('Metadata')
 
8
  it('should allow adding a FileObject resource', () => {
9
  // Streamlit starts on :8501.
10
  cy.visit('http://localhost:8501')
11
+ cy.get('button').contains('Create').click()
12
  cy.get('input[aria-label="Name:red[*]"]').type('MyDataset').blur()
13
  cy.get('[data-testid="stMarkdownContainer"]')
14
  .contains('Metadata')
cypress/e2e/uploadCsv.cy.js CHANGED
@@ -8,7 +8,7 @@ describe('Editor loads a local CSV as a resource', () => {
8
  it('should display the form: Overview, Metadata, Resources, & Record Sets', () => {
9
  // Streamlit starts on :8501.
10
  cy.visit('http://localhost:8501')
11
- cy.get('button', {timeout: 10000}).contains('Create', {timeout: 10000}).click()
12
 
13
  cy.get('input[aria-label="Name:red[*]"]').type('MyDataset').blur()
14
  cy.get('[data-testid="stMarkdownContainer"]')
 
8
  it('should display the form: Overview, Metadata, Resources, & Record Sets', () => {
9
  // Streamlit starts on :8501.
10
  cy.visit('http://localhost:8501')
11
+ cy.get('button').contains('Create').click()
12
 
13
  cy.get('input[aria-label="Name:red[*]"]').type('MyDataset').blur()
14
  cy.get('[data-testid="stMarkdownContainer"]')
views/files.py CHANGED
@@ -113,11 +113,8 @@ def _render_upload_panel():
113
  url = st.session_state[_DISTANT_URL_KEY]
114
  uploaded_file = st.session_state[_LOCAL_FILE_KEY]
115
  file_type = FILE_TYPES[file_type_name]
116
- nodes = (
117
- st.session_state[Metadata].distribution
118
- + st.session_state[Metadata].record_sets
119
- )
120
- names = set([node.name for node in nodes])
121
  if url:
122
  file = file_from_url(file_type, url, names)
123
  elif uploaded_file:
 
113
  url = st.session_state[_DISTANT_URL_KEY]
114
  uploaded_file = st.session_state[_LOCAL_FILE_KEY]
115
  file_type = FILE_TYPES[file_type_name]
116
+ metadata: Metadata = st.session_state[Metadata]
117
+ names = metadata.names()
 
 
 
118
  if url:
119
  file = file_from_url(file_type, url, names)
120
  elif uploaded_file:
views/load.py CHANGED
@@ -3,6 +3,7 @@ import os
3
  from etils import epath
4
  import streamlit as st
5
 
 
6
  from core.past_projects import save_current_project
7
  from core.state import CurrentStep
8
  from core.state import Metadata
@@ -10,25 +11,26 @@ import mlcroissant as mlc
10
  from utils import jump_to
11
 
12
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  def render_load():
14
- file = st.file_uploader("Select a JSON-LD", type="json")
15
- if file is not None:
16
- try:
17
- file_cont = file.read()
18
- newfile_name = (
19
- epath.Path("~").expanduser()
20
- / ".cache"
21
- / "croissant"
22
- / "loaded_croissant"
23
- )
24
- os.makedirs(os.path.dirname(newfile_name), exist_ok=True)
25
- with open(newfile_name, mode="wb+") as outfile:
26
- outfile.write(file_cont)
27
- dataset = mlc.Dataset(newfile_name)
28
- st.session_state[Metadata] = Metadata.from_canonical(dataset.metadata)
29
- jump_to(CurrentStep.editor)
30
- save_current_project()
31
- st.rerun()
32
- except mlc.ValidationError as e:
33
- st.warning(e)
34
- st.toast(body="Invalid Croissant File!", icon="🔥")
 
3
  from etils import epath
4
  import streamlit as st
5
 
6
+ from core.constants import EDITOR_CACHE
7
  from core.past_projects import save_current_project
8
  from core.state import CurrentStep
9
  from core.state import Metadata
 
11
  from utils import jump_to
12
 
13
 
14
+ def _on_file_upload(key):
15
+ file = st.session_state[key]
16
+ file_cont = file.read()
17
+ # TODO(marcenacp): The Python library should support loading from an open file/dict.
18
+ newfile_name = EDITOR_CACHE / "loaded_croissant"
19
+ os.makedirs(os.path.dirname(newfile_name), exist_ok=True)
20
+ with open(newfile_name, mode="wb+") as outfile:
21
+ outfile.write(file_cont)
22
+ try:
23
+ dataset = mlc.Dataset(newfile_name)
24
+ st.session_state[Metadata] = Metadata.from_canonical(dataset.metadata)
25
+ jump_to(CurrentStep.editor)
26
+ save_current_project()
27
+ except mlc.ValidationError as e:
28
+ st.warning(e)
29
+ st.toast(body="Invalid Croissant File!", icon="🔥")
30
+
31
+
32
  def render_load():
33
+ key = "json-ld-file-upload"
34
+ st.file_uploader(
35
+ "Select a JSON-LD", type="json", key=key, on_change=_on_file_upload, args=(key,)
36
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
views/record_sets.py CHANGED
@@ -83,6 +83,11 @@ def _find_joins(fields: list[Field]) -> set[Join]:
83
  return joins
84
 
85
 
 
 
 
 
 
86
  def _handle_fields_change(record_set_key: int, record_set: RecordSet):
87
  data_editor_key = _data_editor_key(record_set_key, record_set)
88
  result = st.session_state[data_editor_key]
@@ -148,7 +153,7 @@ def _render_left_panel():
148
  record_sets = st.session_state[Metadata].record_sets
149
  record_set: RecordSet
150
  for record_set_key, record_set in enumerate(record_sets):
151
- title = f"**{record_set.name}** ({len(record_set.fields)} fields)"
152
  prefix = f"record-set-{record_set_key}"
153
  with st.expander(title, expanded=False):
154
  col1, col2 = st.columns([1, 3])
@@ -238,7 +243,8 @@ def _render_left_panel():
238
  )
239
  st.data_editor(
240
  fields,
241
- use_container_width=True,
 
242
  num_rows="dynamic",
243
  key=data_editor_key,
244
  column_config={
@@ -269,6 +275,12 @@ def _render_left_panel():
269
  on_click=_handle_on_click_field,
270
  args=(record_set_key, record_set),
271
  )
 
 
 
 
 
 
272
 
273
 
274
  def _render_right_panel():
 
83
  return joins
84
 
85
 
86
+ def _handle_create_record_set():
87
+ metadata: Metadata = st.session_state[Metadata]
88
+ metadata.add_record_set(RecordSet(name="new-record-set", description=""))
89
+
90
+
91
  def _handle_fields_change(record_set_key: int, record_set: RecordSet):
92
  data_editor_key = _data_editor_key(record_set_key, record_set)
93
  result = st.session_state[data_editor_key]
 
153
  record_sets = st.session_state[Metadata].record_sets
154
  record_set: RecordSet
155
  for record_set_key, record_set in enumerate(record_sets):
156
+ title = f"**{record_set.name or '-'}** ({len(record_set.fields)} fields)"
157
  prefix = f"record-set-{record_set_key}"
158
  with st.expander(title, expanded=False):
159
  col1, col2 = st.columns([1, 3])
 
243
  )
244
  st.data_editor(
245
  fields,
246
+ # There is a bug with `st.data_editor` when the df is empty.
247
+ use_container_width=not fields.empty,
248
  num_rows="dynamic",
249
  key=data_editor_key,
250
  column_config={
 
275
  on_click=_handle_on_click_field,
276
  args=(record_set_key, record_set),
277
  )
278
+ st.button(
279
+ "Create a new RecordSet",
280
+ key=f"create-new-record-set",
281
+ type="primary",
282
+ on_click=_handle_create_record_set,
283
+ )
284
 
285
 
286
  def _render_right_panel():
views/splash.py CHANGED
@@ -40,8 +40,8 @@ def render_splash():
40
  )
41
  with st.expander("**Try out an example!**", expanded=True):
42
 
43
- def create_example():
44
- url = "https://raw.githubusercontent.com/mlcommons/croissant/main/datasets/titanic/metadata.json"
45
  try:
46
  json = requests.get(url).json()
47
  metadata = mlc.Metadata.from_json(mlc.Issues(), json, None)
@@ -50,16 +50,29 @@ def render_splash():
50
  jump_to(CurrentStep.editor)
51
  except Exception as exception:
52
  logging.error(exception)
53
- st.write(
54
  "Sorry, it seems that the example is broken... Can you please"
55
  " [open an issue on"
56
  " GitHub](https://github.com/mlcommons/croissant/issues/new)?"
57
  )
58
 
 
 
 
 
 
 
 
 
 
 
 
 
59
  st.button(
60
- "Titanic dataset",
61
  on_click=create_example,
62
  type="primary",
 
63
  )
64
  with col2:
65
  with st.expander("**Past projects**", expanded=True):
 
40
  )
41
  with st.expander("**Try out an example!**", expanded=True):
42
 
43
+ def create_example(dataset: str):
44
+ url = f"https://raw.githubusercontent.com/mlcommons/croissant/main/datasets/{dataset.lower()}/metadata.json"
45
  try:
46
  json = requests.get(url).json()
47
  metadata = mlc.Metadata.from_json(mlc.Issues(), json, None)
 
50
  jump_to(CurrentStep.editor)
51
  except Exception as exception:
52
  logging.error(exception)
53
+ st.error(
54
  "Sorry, it seems that the example is broken... Can you please"
55
  " [open an issue on"
56
  " GitHub](https://github.com/mlcommons/croissant/issues/new)?"
57
  )
58
 
59
+ dataset = st.selectbox(
60
+ label="Dataset",
61
+ options=[
62
+ "Titanic",
63
+ "FLORES-200",
64
+ "GPT-3",
65
+ "COCO2014",
66
+ "PASS",
67
+ "MovieLens",
68
+ "Bigcode-The-Stack",
69
+ ],
70
+ )
71
  st.button(
72
+ f"{dataset} dataset",
73
  on_click=create_example,
74
  type="primary",
75
+ args=(dataset,),
76
  )
77
  with col2:
78
  with st.expander("**Past projects**", expanded=True):