Spaces:

Lin-Chen
/

ShareCaptioner-Video

Running on Zero

App Files Files Community

chenlin commited on Jun 12

Commit

abbf8c3

•

1 Parent(s): bdcb3d3

init

Browse files

Files changed (5) hide show

.gitattributes +1 -0
.gitignore +1 -0
app.py +110 -0
pyproject.toml +37 -0
requirements.txt +29 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+*.mp4 filter=lfs diff=lfs merge=lfs -text

.gitignore ADDED Viewed

	@@ -0,0 +1 @@


1	+ __pycache__

app.py ADDED Viewed

	@@ -0,0 +1,110 @@

+import os
+import shutil
+import tempfile
+import spaces
+import gradio as gr
+import torch
+title_markdown = ("""
+<div style="display: flex; justify-content: flex-start; align-items: center; text-align: center;">
+  <div style="margin-right: 20px; display: flex; align-items: center;">
+    <a href="https://github.com/ShareGPT4Omni/ShareGPT4Video" style="text-decoration: none; display: flex; align-items: center;">
+      <img src="https://raw.githubusercontent.com/ShareGPT4V/ShareGPT4V-Resources/master/images/share4video_tight.png" alt="ShareGPT4Video🚀" style="max-width: 120px; height: auto;">
+    </a>
+  </div>
+  <div>
+    <h1>ShareGPT4Video: Improving Video Understanding and Generation with Better Captions</h1>
+    <h5 style="margin: 0;">If you like our project, please give us a star ✨ on Github for the latest update.</h5>
+    <h5 style="margin: 0;"> <a href="https://sharegpt4video.github.io/">[Project Page]</a> <a href="https://github.com/ShareGPT4Omni/ShareGPT4Video">[Code]</a> <a href="https://arxiv.org/abs/2406.04325v1">[Paper]</a>
+  </div>
+</div>
+""")
+block_css = """
+#buttons button {
+    min-width: min(120px,100%);
+}
+"""
+learn_more_markdown = ("""
+### License
+The service is a research preview intended for non-commercial use only, subject to the model [License](https://github.com/facebookresearch/llama/blob/main/MODEL_CARD.md) of LLaMA, [Terms of Use](https://openai.com/policies/terms-of-use) of the data generated by OpenAI, and [Privacy Practices](https://chrome.google.com/webstore/detail/sharegpt-share-your-chatg/daiacboceoaocpibfodeljbdfacokfjb) of ShareGPT. Please contact us if you find any potential violation.
+""")
+model_path = ''
+device = 'cuda'
+load_8bit = False
+load_4bit = False
+dtype = torch.float16
+@spaces.GPU(duration=60)
+def generate_slidingcaptioning(video):
+    return 'text'
+@spaces.GPU(duration=60)
+def generate_fastcaptioning(video):
+    return 'text'
+@spaces.GPU(duration=60)
+def generate_promptrecaptioning(text):
+    return text
+def save_video_to_local(video_path):
+    filename = os.path.join('temp', next(
+        tempfile._get_candidate_names()) + '.mp4')
+    shutil.copyfile(video_path, filename)
+    return filename
+with gr.Blocks(title='ShareCaptioner-Video', theme=gr.themes.Default(), css=block_css) as demo:
+    gr.Markdown(title_markdown)
+    state = gr.State()
+    state_ = gr.State()
+    first_run = gr.State()
+    with gr.Row():
+        gr.Markdown("### The ShareCaptioner-Video is a Four-in-One exceptional video captioning model with the following capabilities:\n1. Fast captioning, 2. Sliding Captioning, 3. Clip Summarizing, 4. Prompt Re-Captioning")
+    with gr.Row():
+        gr.Markdown("(THE DEMO OF \"Clip Summarizing\" IS COMING SOON...)")
+    with gr.Row():
+        with gr.Column(scale=6):
+            with gr.Row():
+                video = gr.Video(label="Input Video")
+                cur_dir = os.path.dirname(os.path.abspath(__file__))
+            with gr.Row():
+                textbox = gr.Textbox(
+                    show_label=False, placeholder="Input Text", container=False
+                )
+            with gr.Row():
+                with gr.Column(scale=2, min_width=50):
+                    submit_btn_sc = gr.Button(
+                        value="Sliding Captioning", variant="primary", interactive=True
+                    )
+                with gr.Column(scale=2, min_width=50):
+                    submit_btn_fc = gr.Button(
+                        value="Fast Captioning", variant="primary", interactive=True
+                    )
+                with gr.Column(scale=2, min_width=50):
+                    submit_btn_pr = gr.Button(
+                        value="Prompt Re-captioning", variant="primary", interactive=True
+                    )
+        with gr.Column(scale=4, min_width=200):
+            with gr.Row():
+                textbox_out = gr.Textbox(
+                    show_label=False, placeholder="Output", container=False
+                )
+    gr.Markdown(learn_more_markdown)
+    submit_btn_sc.click(generate_slidingcaptioning, [video],[textbox_out])
+    submit_btn_fc.click(generate_fastcaptioning, [video], [textbox_out])
+    submit_btn_pr.click(generate_promptrecaptioning, [textbox], [textbox_out])
+### for local launch
+demo.launch(server_name="0.0.0.0",
+            server_port=28358,
+            share=True)
+### for huggingface launch
+# demo.launch()

pyproject.toml ADDED Viewed

	@@ -0,0 +1,37 @@

+[build-system]
+requires = ["setuptools>=61.0"]
+build-backend = "setuptools.build_meta"
+[project]
+name = "llava"
+version = "1.2.2.post1"
+description = "Towards GPT-4 like large language and visual assistant."
+readme = "README.md"
+requires-python = ">=3.8"
+classifiers = [
+    "Programming Language :: Python :: 3",
+    "License :: OSI Approved :: Apache Software License",
+]
+dependencies = [
+    "torch==2.1.2", "torchvision==0.16.2",
+    "transformers==4.37.2", "tokenizers==0.15.1", "sentencepiece==0.1.99", "shortuuid",
+    "accelerate==0.21.0", "peft", "bitsandbytes",
+    "pydantic", "markdown2[all]", "numpy", "scikit-learn==1.2.2",
+    "gradio==4.16.0", "gradio_client==0.8.1", "openai", "spaces",
+    "requests", "httpx==0.24.0", "uvicorn", "fastapi", "decord",
+    "einops==0.6.1", "einops-exts==0.0.4", "timm==0.6.13",
+]
+[project.optional-dependencies]
+train = ["deepspeed==0.12.6", "ninja", "wandb"]
+build = ["build", "twine"]
+[project.urls]
+"Homepage" = "https://llava-vl.github.io"
+"Bug Tracker" = "https://github.com/haotian-liu/LLaVA/issues"
+[tool.setuptools.packages.find]
+exclude = ["assets*", "benchmark*", "docs", "dist*", "playground*", "scripts*", "tests*"]
+[tool.wheel]
+exclude = ["assets*", "benchmark*", "docs", "dist*", "playground*", "scripts*", "tests*"]

requirements.txt ADDED Viewed

	@@ -0,0 +1,29 @@

+torch==2.1.2
+torchvision==0.16.2
+transformers==4.37.2
+tokenizers==0.15.1
+sentencepiece==0.1.99
+accelerate==0.21.0
+peft
+bitsandbytes
+pydantic
+markdown2[all]
+numpy
+scikit-learn==1.2.2
+gradio==4.16.0
+gradio_client==0.8.1
+openai
+spaces
+requests
+httpx==0.24.0
+uvicorn
+fastapi
+decord
+einops==0.6.1
+einops-exts==0.0.4
+timm==0.6.13
+deepspeed==0.12.6
+ninja
+wandb
+build
+twine