jsulz HF staff commited on
Commit
eea405a
β€’
0 Parent(s):

initial commit

Browse files
Files changed (6) hide show
  1. .gitignore +1 -0
  2. README.md +11 -0
  3. app.py +115 -0
  4. poetry.lock +0 -0
  5. pyproject.toml +17 -0
  6. requirements.txt +54 -0
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ __pycache__/
README.md ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Repo Info
3
+ emoji: πŸ‘
4
+ colorFrom: red
5
+ colorTo: indigo
6
+ sdk: gradio
7
+ sdk_version: 5.5.0
8
+ app_file: app.py
9
+ pinned: false
10
+ short_description: Get file and branch stats about any public repo
11
+ ---
app.py ADDED
@@ -0,0 +1,115 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # pylint: disable=no-member
2
+ import gradio as gr
3
+ import requests
4
+ from huggingface_hub import HfApi
5
+ import pandas as pd
6
+ import plotly.express as px
7
+
8
+ HF_API = HfApi()
9
+
10
+
11
+ def format_repo_size(r_size: int) -> str:
12
+ units = {0: "B", 1: "KB", 2: "MB", 3: "GB", 4: "TB", 5: "PB"}
13
+ order = 0
14
+ while r_size >= 1024 and order < len(units) - 1:
15
+ r_size /= 1024
16
+ order += 1
17
+ return f"{r_size:.2f} {units[order]}"
18
+
19
+
20
+ def repo_files(r_type: str, r_id: str) -> dict:
21
+ r_info = HF_API.repo_info(repo_id=r_id, repo_type=r_type, files_metadata=True)
22
+ files = {}
23
+ for sibling in r_info.siblings:
24
+ ext = sibling.rfilename.split(".")[-1]
25
+ if ext in files:
26
+ files[ext]["size"] += sibling.size
27
+ files[ext]["count"] += 1
28
+ else:
29
+ files[ext] = {}
30
+ files[ext]["size"] = sibling.size
31
+ files[ext]["count"] = 1
32
+ return files
33
+
34
+
35
+ def repo_size(r_type, r_id):
36
+ r_refs = HF_API.list_repo_refs(repo_id=r_id, repo_type=r_type)
37
+ repo_sizes = {}
38
+ for branch in r_refs.branches:
39
+ try:
40
+ response = requests.get(
41
+ f"https://huggingface.co/api/{r_type}s/{r_id}/treesize/{branch.name}",
42
+ timeout=1000,
43
+ )
44
+ response = response.json()
45
+ except Exception:
46
+ response = {}
47
+
48
+ size = response.get("size")
49
+ if size is not None:
50
+ repo_sizes[branch.name] = size
51
+ return repo_sizes
52
+
53
+
54
+ def get_repo_info(r_type, r_id):
55
+ repo_sizes = repo_size(r_type, r_id)
56
+ repo_files_info = repo_files(r_type, r_id)
57
+ rf_sizes_df = (
58
+ pd.DataFrame(repo_files_info)
59
+ .T.reset_index(names="ext")
60
+ .sort_values(by="size", ascending=False)
61
+ )
62
+ r_sizes_df = pd.DataFrame(repo_sizes, index=["size"]).T.reset_index(names="branch")
63
+ r_sizes_df["formatted_size"] = r_sizes_df["size"].apply(format_repo_size)
64
+ rf_sizes_df["formatted_size"] = rf_sizes_df["size"].apply(format_repo_size)
65
+ r_sizes_df.columns = ["Branch", "bytes", "Size"]
66
+ rf_sizes_df.columns = ["Extension", "bytes", "Count", "Size"]
67
+ rf_sizes_plot = px.pie(
68
+ rf_sizes_df,
69
+ values="bytes",
70
+ names="Extension",
71
+ hover_data=["Size"],
72
+ title=f"File Distribution in {r_id}",
73
+ hole=0.3,
74
+ )
75
+ return (
76
+ gr.Row(visible=True),
77
+ gr.Dataframe(
78
+ value=rf_sizes_df[["Extension", "Count", "Size"]],
79
+ visible=True,
80
+ ),
81
+ gr.Plot(rf_sizes_plot, visible=True),
82
+ gr.Dataframe(value=r_sizes_df[["Branch", "Size"]], visible=True),
83
+ )
84
+
85
+
86
+ with gr.Blocks(theme="citrus") as demo:
87
+ gr.Markdown("# Repository Information")
88
+ gr.Markdown(
89
+ "Enter a repository ID and repository type and get back information about the repository's files and branches."
90
+ )
91
+ with gr.Blocks():
92
+ repo_id = gr.Textbox(label="Repository ID", placeholder="123456")
93
+ repo_type = gr.Radio(
94
+ choices=["model", "dataset", "space"],
95
+ label="Repository Type",
96
+ value="model",
97
+ )
98
+ search_button = gr.Button(value="Search")
99
+ with gr.Blocks():
100
+ with gr.Row(visible=False) as results:
101
+ with gr.Column():
102
+ gr.Markdown("## File Information")
103
+ with gr.Row():
104
+ file_info = gr.Dataframe(visible=False)
105
+ file_info_plot = gr.Plot(visible=False)
106
+ gr.Markdown("## Branch Sizes")
107
+ branch_sizes = gr.Dataframe(visible=False)
108
+
109
+ search_button.click(
110
+ get_repo_info,
111
+ inputs=[repo_type, repo_id],
112
+ outputs=[results, file_info, file_info_plot, branch_sizes],
113
+ )
114
+
115
+ demo.launch()
poetry.lock ADDED
The diff for this file is too large to render. See raw diff
 
pyproject.toml ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [tool.poetry]
2
+ name = "repo-info"
3
+ version = "0.1.0"
4
+ description = ""
5
+ authors = ["jsulz <[email protected]>"]
6
+ readme = "README.md"
7
+
8
+ [tool.poetry.dependencies]
9
+ python = "^3.12"
10
+ gradio = "^5.5.0"
11
+ huggingface-hub = "^0.26.2"
12
+ plotly = "^5.24.1"
13
+
14
+
15
+ [build-system]
16
+ requires = ["poetry-core"]
17
+ build-backend = "poetry.core.masonry.api"
requirements.txt ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ aiofiles==23.2.1
2
+ annotated-types==0.7.0
3
+ anyio==4.6.2.post1
4
+ certifi==2024.8.30
5
+ charset-normalizer==3.4.0
6
+ click==8.1.7
7
+ colorama==0.4.6
8
+ fastapi==0.115.5
9
+ ffmpy==0.4.0
10
+ filelock==3.16.1
11
+ fsspec==2024.10.0
12
+ gradio-client==1.4.2
13
+ gradio==5.5.0
14
+ h11==0.14.0
15
+ httpcore==1.0.6
16
+ httpx==0.27.2
17
+ huggingface-hub==0.26.2
18
+ idna==3.10
19
+ jinja2==3.1.4
20
+ markdown-it-py==3.0.0
21
+ markupsafe==2.1.5
22
+ mdurl==0.1.2
23
+ numpy==2.1.3
24
+ orjson==3.10.11
25
+ packaging==24.2
26
+ pandas==2.2.3
27
+ pillow==11.0.0
28
+ plotly==5.24.1
29
+ pydantic-core==2.23.4
30
+ pydantic==2.9.2
31
+ pydub==0.25.1
32
+ pygments==2.18.0
33
+ python-dateutil==2.9.0.post0
34
+ python-multipart==0.0.12
35
+ pytz==2024.2
36
+ pyyaml==6.0.2
37
+ requests==2.32.3
38
+ rich==13.9.4
39
+ ruff==0.7.3
40
+ safehttpx==0.1.1
41
+ semantic-version==2.10.0
42
+ shellingham==1.5.4
43
+ six==1.16.0
44
+ sniffio==1.3.1
45
+ starlette==0.41.2
46
+ tenacity==9.0.0
47
+ tomlkit==0.12.0
48
+ tqdm==4.67.0
49
+ typer==0.13.0
50
+ typing-extensions==4.12.2
51
+ tzdata==2024.2
52
+ urllib3==2.2.3
53
+ uvicorn==0.32.0
54
+ websockets==12.0