Spaces:

Ravinandan
/

llama_3.1_with_vision

Runtime error

Ravinandan commited on Sep 4

Commit

b918dff

•

1 Parent(s): dcfbbe4

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,6 +1,5 @@
 import torch
 from transformers import AutoModelForCausalLM, AutoTokenizer
-from PIL import Image
 import gradio as gr
 # Load the model and tokenizer
@@ -15,7 +14,7 @@ tokenizer = AutoTokenizer.from_pretrained("qresearch/llama-3.1-8B-vision-378", u
 # Define the function to process the image and instruction
 def describe_image(image, instruction):
     description = model.answer_question(
-        image, instruction, tokenizer, max_new_tokens=128, do_sample=True, temperature=0.3
     )
     return description
@@ -24,12 +23,21 @@ interface = gr.Interface(
     fn=describe_image,
     inputs=[
         gr.Image(type="pil"),  # Input for the image
-        gr.Textbox(placeholder="Enter your instruction here...", label="Instruction")  # Input for the instruction
     ],
-    outputs="text",  # Output is text (the description)
-    title="LLaMA 3.1 with vision",
     description="Upload an image and enter an instruction to generate a description based on the provided instruction."
 )
 # Launch the Gradio app
-interface.launch()

 import torch
 from transformers import AutoModelForCausalLM, AutoTokenizer
 import gradio as gr
 # Load the model and tokenizer
 # Define the function to process the image and instruction
 def describe_image(image, instruction):
     description = model.answer_question(
+        image, instruction, tokenizer, max_new_tokens=1000, do_sample=True, temperature=0.3
     )
     return description
     fn=describe_image,
     inputs=[
         gr.Image(type="pil"),  # Input for the image
+        gr.Textbox(
+            placeholder="Enter your instruction here...",
+            label="Instruction",
+            lines=10,  # Increase number of lines for instruction input
+            max_lines=20  # Maximum number of lines for scrolling
+        )
     ],
+    outputs=gr.Textbox(
+        label="Description",
+        lines=10,  # Increase number of lines for output
+        max_lines=30  # Maximum number of lines for scrolling
+    ),
+    title="LLaMA 3.1 with Vision",
     description="Upload an image and enter an instruction to generate a description based on the provided instruction."
 )
 # Launch the Gradio app
+interface.launch()