Commit
β’
eea405a
0
Parent(s):
initial commit
Browse files- .gitignore +1 -0
- README.md +11 -0
- app.py +115 -0
- poetry.lock +0 -0
- pyproject.toml +17 -0
- requirements.txt +54 -0
.gitignore
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
__pycache__/
|
README.md
ADDED
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
title: Repo Info
|
3 |
+
emoji: π
|
4 |
+
colorFrom: red
|
5 |
+
colorTo: indigo
|
6 |
+
sdk: gradio
|
7 |
+
sdk_version: 5.5.0
|
8 |
+
app_file: app.py
|
9 |
+
pinned: false
|
10 |
+
short_description: Get file and branch stats about any public repo
|
11 |
+
---
|
app.py
ADDED
@@ -0,0 +1,115 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# pylint: disable=no-member
|
2 |
+
import gradio as gr
|
3 |
+
import requests
|
4 |
+
from huggingface_hub import HfApi
|
5 |
+
import pandas as pd
|
6 |
+
import plotly.express as px
|
7 |
+
|
8 |
+
HF_API = HfApi()
|
9 |
+
|
10 |
+
|
11 |
+
def format_repo_size(r_size: int) -> str:
|
12 |
+
units = {0: "B", 1: "KB", 2: "MB", 3: "GB", 4: "TB", 5: "PB"}
|
13 |
+
order = 0
|
14 |
+
while r_size >= 1024 and order < len(units) - 1:
|
15 |
+
r_size /= 1024
|
16 |
+
order += 1
|
17 |
+
return f"{r_size:.2f} {units[order]}"
|
18 |
+
|
19 |
+
|
20 |
+
def repo_files(r_type: str, r_id: str) -> dict:
|
21 |
+
r_info = HF_API.repo_info(repo_id=r_id, repo_type=r_type, files_metadata=True)
|
22 |
+
files = {}
|
23 |
+
for sibling in r_info.siblings:
|
24 |
+
ext = sibling.rfilename.split(".")[-1]
|
25 |
+
if ext in files:
|
26 |
+
files[ext]["size"] += sibling.size
|
27 |
+
files[ext]["count"] += 1
|
28 |
+
else:
|
29 |
+
files[ext] = {}
|
30 |
+
files[ext]["size"] = sibling.size
|
31 |
+
files[ext]["count"] = 1
|
32 |
+
return files
|
33 |
+
|
34 |
+
|
35 |
+
def repo_size(r_type, r_id):
|
36 |
+
r_refs = HF_API.list_repo_refs(repo_id=r_id, repo_type=r_type)
|
37 |
+
repo_sizes = {}
|
38 |
+
for branch in r_refs.branches:
|
39 |
+
try:
|
40 |
+
response = requests.get(
|
41 |
+
f"https://huggingface.co/api/{r_type}s/{r_id}/treesize/{branch.name}",
|
42 |
+
timeout=1000,
|
43 |
+
)
|
44 |
+
response = response.json()
|
45 |
+
except Exception:
|
46 |
+
response = {}
|
47 |
+
|
48 |
+
size = response.get("size")
|
49 |
+
if size is not None:
|
50 |
+
repo_sizes[branch.name] = size
|
51 |
+
return repo_sizes
|
52 |
+
|
53 |
+
|
54 |
+
def get_repo_info(r_type, r_id):
|
55 |
+
repo_sizes = repo_size(r_type, r_id)
|
56 |
+
repo_files_info = repo_files(r_type, r_id)
|
57 |
+
rf_sizes_df = (
|
58 |
+
pd.DataFrame(repo_files_info)
|
59 |
+
.T.reset_index(names="ext")
|
60 |
+
.sort_values(by="size", ascending=False)
|
61 |
+
)
|
62 |
+
r_sizes_df = pd.DataFrame(repo_sizes, index=["size"]).T.reset_index(names="branch")
|
63 |
+
r_sizes_df["formatted_size"] = r_sizes_df["size"].apply(format_repo_size)
|
64 |
+
rf_sizes_df["formatted_size"] = rf_sizes_df["size"].apply(format_repo_size)
|
65 |
+
r_sizes_df.columns = ["Branch", "bytes", "Size"]
|
66 |
+
rf_sizes_df.columns = ["Extension", "bytes", "Count", "Size"]
|
67 |
+
rf_sizes_plot = px.pie(
|
68 |
+
rf_sizes_df,
|
69 |
+
values="bytes",
|
70 |
+
names="Extension",
|
71 |
+
hover_data=["Size"],
|
72 |
+
title=f"File Distribution in {r_id}",
|
73 |
+
hole=0.3,
|
74 |
+
)
|
75 |
+
return (
|
76 |
+
gr.Row(visible=True),
|
77 |
+
gr.Dataframe(
|
78 |
+
value=rf_sizes_df[["Extension", "Count", "Size"]],
|
79 |
+
visible=True,
|
80 |
+
),
|
81 |
+
gr.Plot(rf_sizes_plot, visible=True),
|
82 |
+
gr.Dataframe(value=r_sizes_df[["Branch", "Size"]], visible=True),
|
83 |
+
)
|
84 |
+
|
85 |
+
|
86 |
+
with gr.Blocks(theme="citrus") as demo:
|
87 |
+
gr.Markdown("# Repository Information")
|
88 |
+
gr.Markdown(
|
89 |
+
"Enter a repository ID and repository type and get back information about the repository's files and branches."
|
90 |
+
)
|
91 |
+
with gr.Blocks():
|
92 |
+
repo_id = gr.Textbox(label="Repository ID", placeholder="123456")
|
93 |
+
repo_type = gr.Radio(
|
94 |
+
choices=["model", "dataset", "space"],
|
95 |
+
label="Repository Type",
|
96 |
+
value="model",
|
97 |
+
)
|
98 |
+
search_button = gr.Button(value="Search")
|
99 |
+
with gr.Blocks():
|
100 |
+
with gr.Row(visible=False) as results:
|
101 |
+
with gr.Column():
|
102 |
+
gr.Markdown("## File Information")
|
103 |
+
with gr.Row():
|
104 |
+
file_info = gr.Dataframe(visible=False)
|
105 |
+
file_info_plot = gr.Plot(visible=False)
|
106 |
+
gr.Markdown("## Branch Sizes")
|
107 |
+
branch_sizes = gr.Dataframe(visible=False)
|
108 |
+
|
109 |
+
search_button.click(
|
110 |
+
get_repo_info,
|
111 |
+
inputs=[repo_type, repo_id],
|
112 |
+
outputs=[results, file_info, file_info_plot, branch_sizes],
|
113 |
+
)
|
114 |
+
|
115 |
+
demo.launch()
|
poetry.lock
ADDED
The diff for this file is too large to render.
See raw diff
|
|
pyproject.toml
ADDED
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[tool.poetry]
|
2 |
+
name = "repo-info"
|
3 |
+
version = "0.1.0"
|
4 |
+
description = ""
|
5 |
+
authors = ["jsulz <[email protected]>"]
|
6 |
+
readme = "README.md"
|
7 |
+
|
8 |
+
[tool.poetry.dependencies]
|
9 |
+
python = "^3.12"
|
10 |
+
gradio = "^5.5.0"
|
11 |
+
huggingface-hub = "^0.26.2"
|
12 |
+
plotly = "^5.24.1"
|
13 |
+
|
14 |
+
|
15 |
+
[build-system]
|
16 |
+
requires = ["poetry-core"]
|
17 |
+
build-backend = "poetry.core.masonry.api"
|
requirements.txt
ADDED
@@ -0,0 +1,54 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
aiofiles==23.2.1
|
2 |
+
annotated-types==0.7.0
|
3 |
+
anyio==4.6.2.post1
|
4 |
+
certifi==2024.8.30
|
5 |
+
charset-normalizer==3.4.0
|
6 |
+
click==8.1.7
|
7 |
+
colorama==0.4.6
|
8 |
+
fastapi==0.115.5
|
9 |
+
ffmpy==0.4.0
|
10 |
+
filelock==3.16.1
|
11 |
+
fsspec==2024.10.0
|
12 |
+
gradio-client==1.4.2
|
13 |
+
gradio==5.5.0
|
14 |
+
h11==0.14.0
|
15 |
+
httpcore==1.0.6
|
16 |
+
httpx==0.27.2
|
17 |
+
huggingface-hub==0.26.2
|
18 |
+
idna==3.10
|
19 |
+
jinja2==3.1.4
|
20 |
+
markdown-it-py==3.0.0
|
21 |
+
markupsafe==2.1.5
|
22 |
+
mdurl==0.1.2
|
23 |
+
numpy==2.1.3
|
24 |
+
orjson==3.10.11
|
25 |
+
packaging==24.2
|
26 |
+
pandas==2.2.3
|
27 |
+
pillow==11.0.0
|
28 |
+
plotly==5.24.1
|
29 |
+
pydantic-core==2.23.4
|
30 |
+
pydantic==2.9.2
|
31 |
+
pydub==0.25.1
|
32 |
+
pygments==2.18.0
|
33 |
+
python-dateutil==2.9.0.post0
|
34 |
+
python-multipart==0.0.12
|
35 |
+
pytz==2024.2
|
36 |
+
pyyaml==6.0.2
|
37 |
+
requests==2.32.3
|
38 |
+
rich==13.9.4
|
39 |
+
ruff==0.7.3
|
40 |
+
safehttpx==0.1.1
|
41 |
+
semantic-version==2.10.0
|
42 |
+
shellingham==1.5.4
|
43 |
+
six==1.16.0
|
44 |
+
sniffio==1.3.1
|
45 |
+
starlette==0.41.2
|
46 |
+
tenacity==9.0.0
|
47 |
+
tomlkit==0.12.0
|
48 |
+
tqdm==4.67.0
|
49 |
+
typer==0.13.0
|
50 |
+
typing-extensions==4.12.2
|
51 |
+
tzdata==2024.2
|
52 |
+
urllib3==2.2.3
|
53 |
+
uvicorn==0.32.0
|
54 |
+
websockets==12.0
|