Spaces:
Sleeping
Sleeping
sathvikparasa20
commited on
Commit
•
5c62fee
1
Parent(s):
1b7de6b
Delete app.py
Browse files
app.py
DELETED
@@ -1,39 +0,0 @@
|
|
1 |
-
from transformers import ViltProcessor, ViltForQuestionAnswering
|
2 |
-
import torch
|
3 |
-
import gradio as gr
|
4 |
-
|
5 |
-
# Load the model and processor
|
6 |
-
processor = ViltProcessor.from_pretrained("dandelin/vilt-b32-finetuned-vqa")
|
7 |
-
model = ViltForQuestionAnswering.from_pretrained("dandelin/vilt-b32-finetuned-vqa")
|
8 |
-
|
9 |
-
def answer_question(image, text):
|
10 |
-
# Convert the uploaded image to PIL format
|
11 |
-
image = Image.fromarray(image.astype('uint8'), 'RGB')
|
12 |
-
|
13 |
-
# Process the image and text
|
14 |
-
encoding = processor(images=image, text=text, return_tensors="pt", padding=True)
|
15 |
-
|
16 |
-
# Forward pass
|
17 |
-
with torch.no_grad():
|
18 |
-
outputs = model(**encoding)
|
19 |
-
|
20 |
-
logits = outputs.logits
|
21 |
-
idx = logits.argmax(-1).item()
|
22 |
-
predicted_answer = model.config.id2label[idx]
|
23 |
-
|
24 |
-
# Return the predicted answer
|
25 |
-
return predicted_answer
|
26 |
-
|
27 |
-
# Define Gradio inputs and outputs
|
28 |
-
image = gr.Image(type="numpy", label="Upload Image")
|
29 |
-
question = gr.Textbox(lines=2, label="Question")
|
30 |
-
answer = gr.Textbox(label="Predicted Answer")
|
31 |
-
|
32 |
-
# Create Gradio Interface
|
33 |
-
gr.Interface(
|
34 |
-
fn=answer_question,
|
35 |
-
inputs=[image, question],
|
36 |
-
outputs=answer,
|
37 |
-
title="Image Based Visual Question Answering",
|
38 |
-
description="This is a demonstration of ViLT (Vision and Language Transformer) using Gradio, which has been fine-tuned on VQAv2 to answer questions based on images. To get a predicted answer, please provide an image and type in your question, then press the submit button."
|
39 |
-
).launch(share=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|