Molmo-4bit

Running on Zero

App Files Files Community

zamal commited on Oct 12

Commit

4f1e215

•

1 Parent(s): e80f948

Update app.py

Browse files

Files changed (1) hide show

app.py +22 -16

app.py CHANGED Viewed

@@ -18,14 +18,14 @@ processor = AutoProcessor.from_pretrained(repo_name, **arguments)
 # Define the function for image description
 @spaces.GPU  # This ensures the function gets GPU access when needed
-def describe_image(image):
     # Load the model inside the function and move it to GPU
     model = AutoModelForCausalLM.from_pretrained(repo_name, **arguments).to('cuda')
-    # Process the uploaded image
     inputs = processor.process(
         images=[image],
-        text="Describe this image in great detail without missing any piece of information"
     )
     # Move inputs to model device (GPU)
@@ -46,21 +46,27 @@ def describe_image(image):
 # Gradio interface
 def gradio_app():
-    # Define Gradio interface
-    image_input = gr.Image(type="pil", label="Upload Image")
-    output_text = gr.Textbox(label="Image Description", interactive=False)
-    # Create Gradio interface
-    interface = gr.Interface(
-        fn=describe_image,
-        inputs=image_input,
-        outputs=output_text,
-        title="Image Description App",
-        description="Upload an image and get a detailed description using the Molmo 7B model"
-    )
-    # Launch the interface
-    interface.launch()
 # Launch the Gradio app
 gradio_app()

 # Define the function for image description
 @spaces.GPU  # This ensures the function gets GPU access when needed
+def describe_image(image, question):
     # Load the model inside the function and move it to GPU
     model = AutoModelForCausalLM.from_pretrained(repo_name, **arguments).to('cuda')
+    # Process the uploaded image along with the user's question
     inputs = processor.process(
         images=[image],
+        text=question if question else "Describe this image in great detail without missing any piece of information"
     )
     # Move inputs to model device (GPU)
 # Gradio interface
 def gradio_app():
+    with gr.Blocks() as demo:
+        gr.Markdown("# Image Long Description with Molmo-7B 4 bit quantized\n### Upload an image and ask a question about it!")
+        with gr.Row():
+            image_input = gr.Image(type="pil", label="Upload an Image")
+            question_input = gr.Textbox(placeholder="Ask a question about the image (e.g., 'What is happening in this image?')", label="Question (Optional)")
+        output_text = gr.Textbox(label="Image Description", interactive=False)
+        # Submit button to generate the description
+        submit_btn = gr.Button("Generate Description")
+        # Callback to run when submit button is clicked
+        submit_btn.click(
+            fn=describe_image,
+            inputs=[image_input, question_input],
+            outputs=output_text
+        )
+    # Launch the Gradio interface
+    demo.launch()
 # Launch the Gradio app
 gradio_app()