Spaces:
Running
on
Zero
Running
on
Zero
import torch | |
import gradio as gr | |
from transformers import AutoModel, pipeline, AutoTokenizer | |
import spaces | |
import subprocess | |
# from issue: https://discuss.huggingface.co/t/how-to-install-flash-attention-on-hf-gradio-space/70698/2 | |
# InternVL2 needs flash_attn | |
subprocess.run( | |
"pip install flash-attn --no-build-isolation", | |
env={"FLASH_ATTENTION_SKIP_CUDA_BUILD": "TRUE"}, | |
shell=True, | |
) | |
try: | |
model_name = "OpenGVLab/InternVL2-8B" | |
# model: <class 'transformers_modules.OpenGVLab.InternVL2-8B.0e6d592d957d9739b6df0f4b90be4cb0826756b9.modeling_internvl_chat.InternVLChatModel'> | |
model = ( | |
AutoModel.from_pretrained( | |
model_name, | |
torch_dtype=torch.bfloat16, | |
# low_cpu_mem_usage=True, | |
trust_remote_code=True, | |
) | |
.cuda() | |
.eval() | |
) | |
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True) | |
# pipeline: <class 'transformers.pipelines.visual_question_answering.VisualQuestionAnsweringPipeline'> | |
inference = pipeline( | |
task="visual-question-answering", model=model, tokenizer=tokenizer | |
) | |
except Exception as error: | |
raise gr.Error("π" + str(error), duration=30) | |
def predict(input_img, questions): | |
try: | |
gr.Info("pipeline: " + str(type(inference))) | |
gr.Info("model: " + str(type(model))) | |
predictions = inference(question=questions, image=input_img) | |
return str(predictions) | |
except Exception as e: | |
error_message = "β" + str(e) | |
raise gr.Error(error_message, duration=25) | |
gradio_app = gr.Interface( | |
predict, | |
inputs=[ | |
gr.Image(label="Select A Image", sources=["upload", "webcam"], type="pil"), | |
"text", | |
], | |
outputs="text", | |
title='ask me anything', | |
) | |
if __name__ == "__main__": | |
gradio_app.launch(show_error=True, debug=True) | |