Spaces:
Running
Running
removing more trailing s from units
Browse files
app.py
CHANGED
@@ -333,9 +333,9 @@ def filter_by_extension_month(_df, _extension):
|
|
333 |
|
334 |
# Update layout
|
335 |
fig.update_layout(
|
336 |
-
title="Monthly Additions of LFS Files by Extension (in
|
337 |
xaxis_title="Date",
|
338 |
-
yaxis_title="Size (
|
339 |
legend_title="Type",
|
340 |
yaxis=dict(tickformat=".2f"), # Format y-axis labels to 2 decimal places
|
341 |
)
|
@@ -431,7 +431,7 @@ with gr.Blocks(theme="citrus") as demo:
|
|
431 |
with gr.Column(scale=2):
|
432 |
gr.Markdown("### Current Storage Usage")
|
433 |
gr.Markdown(
|
434 |
-
"As of September 20, 2024, total files stored in Git LFS summed to almost 29 PB. To put this into perspective, the last [Common Crawl](https://commoncrawl.org/) download was [451
|
435 |
)
|
436 |
with gr.Column(scale=3):
|
437 |
# Convert the total size to petabytes and format to two decimal places
|
@@ -457,14 +457,14 @@ with gr.Blocks(theme="citrus") as demo:
|
|
457 |
# drop the unnamed: 0 column
|
458 |
by_extension_size = by_extension_size.drop(columns=["Unnamed: 0"])
|
459 |
# average size
|
460 |
-
by_extension_size["Average File Size (
|
461 |
by_extension_size["size"].astype(float) / by_extension_size["count"]
|
462 |
)
|
463 |
-
by_extension_size["Average File Size (
|
464 |
-
by_extension_size["Average File Size (
|
465 |
)
|
466 |
-
by_extension_size["Average File Size (
|
467 |
-
"Average File Size (
|
468 |
].map("{:.2f}".format)
|
469 |
# format the size column
|
470 |
by_extension_size = format_dataframe_size_column(by_extension_size, ["size"])
|
@@ -487,7 +487,7 @@ with gr.Blocks(theme="citrus") as demo:
|
|
487 |
"File Extension",
|
488 |
"Total Size (PB)",
|
489 |
"Number of Files",
|
490 |
-
"Average File Size (
|
491 |
]
|
492 |
]
|
493 |
)
|
@@ -501,7 +501,7 @@ with gr.Blocks(theme="citrus") as demo:
|
|
501 |
|
502 |
gr.HTML(div_px(5))
|
503 |
gr.Markdown(
|
504 |
-
"To dig deeper, use the dropdown to filter by file extension and see the bytes added (in
|
505 |
)
|
506 |
|
507 |
# get the unique values in the extension column and remove any empty strings
|
|
|
333 |
|
334 |
# Update layout
|
335 |
fig.update_layout(
|
336 |
+
title="Monthly Additions of LFS Files by Extension (in TB)",
|
337 |
xaxis_title="Date",
|
338 |
+
yaxis_title="Size (TB)",
|
339 |
legend_title="Type",
|
340 |
yaxis=dict(tickformat=".2f"), # Format y-axis labels to 2 decimal places
|
341 |
)
|
|
|
431 |
with gr.Column(scale=2):
|
432 |
gr.Markdown("### Current Storage Usage")
|
433 |
gr.Markdown(
|
434 |
+
"As of September 20, 2024, total files stored in Git LFS summed to almost 29 PB. To put this into perspective, the last [Common Crawl](https://commoncrawl.org/) download was [451 TB](https://github.com/commoncrawl/cc-crawl-statistics/blob/master/stats/crawler/CC-MAIN-2024-38.json#L31) - the Hub stores the equivalent of more than **64 Common Crawls** 🤯."
|
435 |
)
|
436 |
with gr.Column(scale=3):
|
437 |
# Convert the total size to petabytes and format to two decimal places
|
|
|
457 |
# drop the unnamed: 0 column
|
458 |
by_extension_size = by_extension_size.drop(columns=["Unnamed: 0"])
|
459 |
# average size
|
460 |
+
by_extension_size["Average File Size (MB)"] = (
|
461 |
by_extension_size["size"].astype(float) / by_extension_size["count"]
|
462 |
)
|
463 |
+
by_extension_size["Average File Size (MB)"] = (
|
464 |
+
by_extension_size["Average File Size (MB)"] / 1e6
|
465 |
)
|
466 |
+
by_extension_size["Average File Size (MB)"] = by_extension_size[
|
467 |
+
"Average File Size (MB)"
|
468 |
].map("{:.2f}".format)
|
469 |
# format the size column
|
470 |
by_extension_size = format_dataframe_size_column(by_extension_size, ["size"])
|
|
|
487 |
"File Extension",
|
488 |
"Total Size (PB)",
|
489 |
"Number of Files",
|
490 |
+
"Average File Size (MB)",
|
491 |
]
|
492 |
]
|
493 |
)
|
|
|
501 |
|
502 |
gr.HTML(div_px(5))
|
503 |
gr.Markdown(
|
504 |
+
"To dig deeper, use the dropdown to filter by file extension and see the bytes added (in TB) each month for specific file types."
|
505 |
)
|
506 |
|
507 |
# get the unique values in the extension column and remove any empty strings
|