Spaces:

shikunl
/

prismer

Sleeping

App Files Files Community

shikunl commited on Mar 12, 2023

Commit

818a4f8

•

1 Parent(s): 7617596

Reset

Browse files

Files changed (3) hide show

app.py +13 -7
app_caption.py +3 -13
prismer_model.py +8 -32

app.py CHANGED Viewed

@@ -11,25 +11,31 @@ import gradio as gr
 if os.getenv('SYSTEM') == 'spaces':
     with open('patch') as f:
         subprocess.run('patch -p1'.split(), cwd='prismer', stdin=f)
-    shutil.copytree('prismer/helpers/images',
-                    'prismer/images',
-                    dirs_exist_ok=True)
 from app_caption import create_demo as create_demo_caption
 from prismer_model import build_deformable_conv, download_models
 download_models()
 build_deformable_conv()
-DESCRIPTION = '# [Prismer](https://github.com/nvlabs/prismer)'
 if (SPACE_ID := os.getenv('SPACE_ID')) is not None:
-    DESCRIPTION += f'<p>For faster inference without waiting in queue, you may duplicate the space and upgrade to GPU in settings. <a href="https://huggingface.co/spaces/{SPACE_ID}?duplicate=true"><img style="display: inline; margin-top: 0em; margin-bottom: 0em" src="https://bit.ly/3gLdBN6" alt="Duplicate Space" /></a></p>'
 with gr.Blocks(css='style.css') as demo:
-    gr.Markdown(DESCRIPTION)
     with gr.Tabs():
-        with gr.TabItem('Caption'):
             create_demo_caption()
 demo.queue(api_open=False).launch()

 if os.getenv('SYSTEM') == 'spaces':
     with open('patch') as f:
         subprocess.run('patch -p1'.split(), cwd='prismer', stdin=f)
+    shutil.copytree('prismer/helpers/images', 'prismer/images', dirs_exist_ok=True)
 from app_caption import create_demo as create_demo_caption
 from prismer_model import build_deformable_conv, download_models
+# Prepare model checkpoints
 download_models()
 build_deformable_conv()
+# Demo file here
+description = """
+# Prismer
+The official demo for **Prismer: A Vision-Language Model with An Ensemble of Experts**.
+Please refer to our [project page](https://shikun.io/projects/prismer) or [github](https://github.com/NVlabs/prismer) for more details.
+"""
 if (SPACE_ID := os.getenv('SPACE_ID')) is not None:
+    description += f'For faster inference without waiting in queue, you may duplicate the space and upgrade to GPU in settings. <a href="https://huggingface.co/spaces/{SPACE_ID}?duplicate=true"><img style="display: inline; margin-top: 0em; margin-bottom: 0em" src="https://bit.ly/3gLdBN6" alt="Duplicate Space" /></a>'
 with gr.Blocks(css='style.css') as demo:
+    gr.Markdown(description)
     with gr.Tabs():
+        with gr.TabItem('Zero-shot Image Captioning'):
             create_demo_caption()
 demo.queue(api_open=False).launch()

app_caption.py CHANGED Viewed

@@ -15,10 +15,8 @@ def create_demo():
     with gr.Row():
         with gr.Column():
-            image = gr.Image(label='Input', type='filepath')
-            model_name = gr.Dropdown(label='Model',
-                                     choices=['prismer_base'],
-                                     value='prismer_base')
             run_button = gr.Button('Run')
         with gr.Column(scale=1.5):
             caption = gr.Text(label='Caption')
@@ -32,15 +30,7 @@ def create_demo():
                 ocr = gr.Image(label='OCR Detection')
     inputs = [image, model_name]
-    outputs = [
-        caption,
-        depth,
-        edge,
-        normals,
-        segmentation,
-        object_detection,
-        ocr,
-    ]
     paths = sorted(pathlib.Path('prismer/images').glob('*'))
     examples = [[path.as_posix(), 'prismer_base'] for path in paths]

     with gr.Row():
         with gr.Column():
+            image = gr.Image(label='Input Image', type='filepath')
+            model_name = gr.Dropdown(label='Model Size', choices=['prismer_base'], value='prismer_base')
             run_button = gr.Button('Run')
         with gr.Column(scale=1.5):
             caption = gr.Text(label='Caption')
                 ocr = gr.Image(label='OCR Detection')
     inputs = [image, model_name]
+    outputs = [caption, depth, edge, normals, segmentation, object_detection, ocr]
     paths = sorted(pathlib.Path('prismer/images').glob('*'))
     examples = [[path.as_posix(), 'prismer_base'] for path in paths]

prismer_model.py CHANGED Viewed

@@ -20,32 +20,22 @@ from model.prismer_caption import PrismerCaption
 def download_models() -> None:
     if not pathlib.Path('prismer/experts/expert_weights/').exists():
-        subprocess.run(shlex.split(
-            'python download_checkpoints.py --download_experts=True'),
-                       cwd='prismer')
     model_names = [
-        'vqa_prismer_base',
-        'vqa_prismer_large',
-        'vqa_prismerz_base',
-        'vqa_prismerz_large',
-        'caption_prismerz_base',
-        'caption_prismerz_large',
         'caption_prismer_base',
         'caption_prismer_large',
     ]
     for model_name in model_names:
         if pathlib.Path(f'prismer/logging/{model_name}').exists():
             continue
-        subprocess.run(shlex.split(
-            f'python download_checkpoints.py --download_models={model_name}'),
-                       cwd='prismer')
 def build_deformable_conv() -> None:
-    subprocess.run(
-        shlex.split('sh make.sh'),
-        cwd=
-        'prismer/experts/segmentation/mask2former/modeling/pixel_decoder/ops')
 def run_experts(image_path: str) -> tuple[str | None, ...]:
@@ -56,14 +46,7 @@ def run_experts(image_path: str) -> tuple[str | None, ...]:
     out_path = image_dir / 'image.jpg'
     cv2.imwrite(out_path.as_posix(), cv2.imread(image_path))
-    expert_names = [
-        'depth',
-        'edge',
-        'normal',
-        'objdet',
-        'ocrdet',
-        'segmentation',
-    ]
     for expert_name in expert_names:
         env = os.environ.copy()
         if 'PYTHONPATH' in env:
@@ -76,14 +59,7 @@ def run_experts(image_path: str) -> tuple[str | None, ...]:
             env=env,
             check=True)
-    keys = [
-        'depth',
-        'edge',
-        'normal',
-        'seg_coco',
-        'obj_detection',
-        'ocr_detection',
-    ]
     results = [
         pathlib.Path('prismer/helpers/labels') / key /
         'helpers/images/image.png' for key in keys

 def download_models() -> None:
     if not pathlib.Path('prismer/experts/expert_weights/').exists():
+        subprocess.run(shlex.split('python download_checkpoints.py --download_experts=True'), cwd='prismer')
     model_names = [
+        # 'vqa_prismer_base',
+        # 'vqa_prismer_large',
         'caption_prismer_base',
         'caption_prismer_large',
     ]
     for model_name in model_names:
         if pathlib.Path(f'prismer/logging/{model_name}').exists():
             continue
+        subprocess.run(shlex.split(f'python download_checkpoints.py --download_models={model_name}'), cwd='prismer')
 def build_deformable_conv() -> None:
+    subprocess.run(shlex.split('sh make.sh'), cwd='prismer/experts/segmentation/mask2former/modeling/pixel_decoder/ops')
 def run_experts(image_path: str) -> tuple[str | None, ...]:
     out_path = image_dir / 'image.jpg'
     cv2.imwrite(out_path.as_posix(), cv2.imread(image_path))
+    expert_names = ['depth', 'edge', 'normal', 'objdet', 'ocrdet', 'segmentation']
     for expert_name in expert_names:
         env = os.environ.copy()
         if 'PYTHONPATH' in env:
             env=env,
             check=True)
+    keys = ['depth', 'edge', 'normal', 'seg_coco', 'obj_detection', 'ocr_detection']
     results = [
         pathlib.Path('prismer/helpers/labels') / key /
         'helpers/images/image.png' for key in keys