Spaces:
Running
on
Zero
Running
on
Zero
chenlin
commited on
Commit
•
abbf8c3
1
Parent(s):
bdcb3d3
init
Browse files- .gitattributes +1 -0
- .gitignore +1 -0
- app.py +110 -0
- pyproject.toml +37 -0
- requirements.txt +29 -0
.gitattributes
CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
*.mp4 filter=lfs diff=lfs merge=lfs -text
|
.gitignore
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
__pycache__
|
app.py
ADDED
@@ -0,0 +1,110 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import shutil
|
3 |
+
import tempfile
|
4 |
+
|
5 |
+
import spaces
|
6 |
+
import gradio as gr
|
7 |
+
import torch
|
8 |
+
|
9 |
+
title_markdown = ("""
|
10 |
+
<div style="display: flex; justify-content: flex-start; align-items: center; text-align: center;">
|
11 |
+
<div style="margin-right: 20px; display: flex; align-items: center;">
|
12 |
+
<a href="https://github.com/ShareGPT4Omni/ShareGPT4Video" style="text-decoration: none; display: flex; align-items: center;">
|
13 |
+
<img src="https://raw.githubusercontent.com/ShareGPT4V/ShareGPT4V-Resources/master/images/share4video_tight.png" alt="ShareGPT4Video🚀" style="max-width: 120px; height: auto;">
|
14 |
+
</a>
|
15 |
+
</div>
|
16 |
+
<div>
|
17 |
+
<h1>ShareGPT4Video: Improving Video Understanding and Generation with Better Captions</h1>
|
18 |
+
<h5 style="margin: 0;">If you like our project, please give us a star ✨ on Github for the latest update.</h5>
|
19 |
+
<h5 style="margin: 0;"> <a href="https://sharegpt4video.github.io/">[Project Page]</a> <a href="https://github.com/ShareGPT4Omni/ShareGPT4Video">[Code]</a> <a href="https://arxiv.org/abs/2406.04325v1">[Paper]</a>
|
20 |
+
</div>
|
21 |
+
</div>
|
22 |
+
""")
|
23 |
+
|
24 |
+
block_css = """
|
25 |
+
#buttons button {
|
26 |
+
min-width: min(120px,100%);
|
27 |
+
}
|
28 |
+
"""
|
29 |
+
|
30 |
+
learn_more_markdown = ("""
|
31 |
+
### License
|
32 |
+
The service is a research preview intended for non-commercial use only, subject to the model [License](https://github.com/facebookresearch/llama/blob/main/MODEL_CARD.md) of LLaMA, [Terms of Use](https://openai.com/policies/terms-of-use) of the data generated by OpenAI, and [Privacy Practices](https://chrome.google.com/webstore/detail/sharegpt-share-your-chatg/daiacboceoaocpibfodeljbdfacokfjb) of ShareGPT. Please contact us if you find any potential violation.
|
33 |
+
""")
|
34 |
+
|
35 |
+
|
36 |
+
model_path = ''
|
37 |
+
device = 'cuda'
|
38 |
+
load_8bit = False
|
39 |
+
load_4bit = False
|
40 |
+
dtype = torch.float16
|
41 |
+
|
42 |
+
|
43 |
+
@spaces.GPU(duration=60)
|
44 |
+
def generate_slidingcaptioning(video):
|
45 |
+
return 'text'
|
46 |
+
|
47 |
+
@spaces.GPU(duration=60)
|
48 |
+
def generate_fastcaptioning(video):
|
49 |
+
return 'text'
|
50 |
+
|
51 |
+
@spaces.GPU(duration=60)
|
52 |
+
def generate_promptrecaptioning(text):
|
53 |
+
return text
|
54 |
+
|
55 |
+
def save_video_to_local(video_path):
|
56 |
+
filename = os.path.join('temp', next(
|
57 |
+
tempfile._get_candidate_names()) + '.mp4')
|
58 |
+
shutil.copyfile(video_path, filename)
|
59 |
+
return filename
|
60 |
+
|
61 |
+
with gr.Blocks(title='ShareCaptioner-Video', theme=gr.themes.Default(), css=block_css) as demo:
|
62 |
+
gr.Markdown(title_markdown)
|
63 |
+
state = gr.State()
|
64 |
+
state_ = gr.State()
|
65 |
+
first_run = gr.State()
|
66 |
+
|
67 |
+
with gr.Row():
|
68 |
+
gr.Markdown("### The ShareCaptioner-Video is a Four-in-One exceptional video captioning model with the following capabilities:\n1. Fast captioning, 2. Sliding Captioning, 3. Clip Summarizing, 4. Prompt Re-Captioning")
|
69 |
+
with gr.Row():
|
70 |
+
gr.Markdown("(THE DEMO OF \"Clip Summarizing\" IS COMING SOON...)")
|
71 |
+
with gr.Row():
|
72 |
+
with gr.Column(scale=6):
|
73 |
+
with gr.Row():
|
74 |
+
video = gr.Video(label="Input Video")
|
75 |
+
cur_dir = os.path.dirname(os.path.abspath(__file__))
|
76 |
+
with gr.Row():
|
77 |
+
textbox = gr.Textbox(
|
78 |
+
show_label=False, placeholder="Input Text", container=False
|
79 |
+
)
|
80 |
+
with gr.Row():
|
81 |
+
with gr.Column(scale=2, min_width=50):
|
82 |
+
submit_btn_sc = gr.Button(
|
83 |
+
value="Sliding Captioning", variant="primary", interactive=True
|
84 |
+
)
|
85 |
+
with gr.Column(scale=2, min_width=50):
|
86 |
+
submit_btn_fc = gr.Button(
|
87 |
+
value="Fast Captioning", variant="primary", interactive=True
|
88 |
+
)
|
89 |
+
with gr.Column(scale=2, min_width=50):
|
90 |
+
submit_btn_pr = gr.Button(
|
91 |
+
value="Prompt Re-captioning", variant="primary", interactive=True
|
92 |
+
)
|
93 |
+
with gr.Column(scale=4, min_width=200):
|
94 |
+
with gr.Row():
|
95 |
+
textbox_out = gr.Textbox(
|
96 |
+
show_label=False, placeholder="Output", container=False
|
97 |
+
)
|
98 |
+
gr.Markdown(learn_more_markdown)
|
99 |
+
|
100 |
+
submit_btn_sc.click(generate_slidingcaptioning, [video],[textbox_out])
|
101 |
+
submit_btn_fc.click(generate_fastcaptioning, [video], [textbox_out])
|
102 |
+
submit_btn_pr.click(generate_promptrecaptioning, [textbox], [textbox_out])
|
103 |
+
|
104 |
+
### for local launch
|
105 |
+
demo.launch(server_name="0.0.0.0",
|
106 |
+
server_port=28358,
|
107 |
+
share=True)
|
108 |
+
|
109 |
+
### for huggingface launch
|
110 |
+
# demo.launch()
|
pyproject.toml
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[build-system]
|
2 |
+
requires = ["setuptools>=61.0"]
|
3 |
+
build-backend = "setuptools.build_meta"
|
4 |
+
|
5 |
+
[project]
|
6 |
+
name = "llava"
|
7 |
+
version = "1.2.2.post1"
|
8 |
+
description = "Towards GPT-4 like large language and visual assistant."
|
9 |
+
readme = "README.md"
|
10 |
+
requires-python = ">=3.8"
|
11 |
+
classifiers = [
|
12 |
+
"Programming Language :: Python :: 3",
|
13 |
+
"License :: OSI Approved :: Apache Software License",
|
14 |
+
]
|
15 |
+
dependencies = [
|
16 |
+
"torch==2.1.2", "torchvision==0.16.2",
|
17 |
+
"transformers==4.37.2", "tokenizers==0.15.1", "sentencepiece==0.1.99", "shortuuid",
|
18 |
+
"accelerate==0.21.0", "peft", "bitsandbytes",
|
19 |
+
"pydantic", "markdown2[all]", "numpy", "scikit-learn==1.2.2",
|
20 |
+
"gradio==4.16.0", "gradio_client==0.8.1", "openai", "spaces",
|
21 |
+
"requests", "httpx==0.24.0", "uvicorn", "fastapi", "decord",
|
22 |
+
"einops==0.6.1", "einops-exts==0.0.4", "timm==0.6.13",
|
23 |
+
]
|
24 |
+
|
25 |
+
[project.optional-dependencies]
|
26 |
+
train = ["deepspeed==0.12.6", "ninja", "wandb"]
|
27 |
+
build = ["build", "twine"]
|
28 |
+
|
29 |
+
[project.urls]
|
30 |
+
"Homepage" = "https://llava-vl.github.io"
|
31 |
+
"Bug Tracker" = "https://github.com/haotian-liu/LLaVA/issues"
|
32 |
+
|
33 |
+
[tool.setuptools.packages.find]
|
34 |
+
exclude = ["assets*", "benchmark*", "docs", "dist*", "playground*", "scripts*", "tests*"]
|
35 |
+
|
36 |
+
[tool.wheel]
|
37 |
+
exclude = ["assets*", "benchmark*", "docs", "dist*", "playground*", "scripts*", "tests*"]
|
requirements.txt
ADDED
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
torch==2.1.2
|
2 |
+
torchvision==0.16.2
|
3 |
+
transformers==4.37.2
|
4 |
+
tokenizers==0.15.1
|
5 |
+
sentencepiece==0.1.99
|
6 |
+
accelerate==0.21.0
|
7 |
+
peft
|
8 |
+
bitsandbytes
|
9 |
+
pydantic
|
10 |
+
markdown2[all]
|
11 |
+
numpy
|
12 |
+
scikit-learn==1.2.2
|
13 |
+
gradio==4.16.0
|
14 |
+
gradio_client==0.8.1
|
15 |
+
openai
|
16 |
+
spaces
|
17 |
+
requests
|
18 |
+
httpx==0.24.0
|
19 |
+
uvicorn
|
20 |
+
fastapi
|
21 |
+
decord
|
22 |
+
einops==0.6.1
|
23 |
+
einops-exts==0.0.4
|
24 |
+
timm==0.6.13
|
25 |
+
deepspeed==0.12.6
|
26 |
+
ninja
|
27 |
+
wandb
|
28 |
+
build
|
29 |
+
twine
|