ChandimaPrabath
commited on
Commit
•
30085b1
1
Parent(s):
71141aa
Update app.py
Browse files
app.py
CHANGED
@@ -2,16 +2,14 @@ import gradio as gr
|
|
2 |
from transformers import AutoProcessor, AutoModelForCausalLM
|
3 |
import spaces
|
4 |
import re
|
5 |
-
from PIL import Image
|
6 |
|
7 |
-
|
8 |
-
subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
|
9 |
|
10 |
-
model = AutoModelForCausalLM.from_pretrained('gokaygokay/Florence-2-SD3-Captioner', trust_remote_code=True).
|
11 |
|
12 |
processor = AutoProcessor.from_pretrained('gokaygokay/Florence-2-SD3-Captioner', trust_remote_code=True)
|
13 |
|
14 |
-
|
15 |
TITLE = "# [Florence-2 SD3 Long Captioner](https://huggingface.co/gokaygokay/Florence-2-SD3-Captioner/)"
|
16 |
DESCRIPTION = "[Florence-2 Base](https://huggingface.co/microsoft/Florence-2-base-ft) fine-tuned on Long SD3 Prompt and Image pairs. Check above link for datasets that are used for fine-tuning."
|
17 |
|
@@ -53,7 +51,7 @@ def run_example(image):
|
|
53 |
if image.mode != "RGB":
|
54 |
image = image.convert("RGB")
|
55 |
|
56 |
-
inputs = processor(text=prompt, images=image, return_tensors="pt")
|
57 |
generated_ids = model.generate(
|
58 |
input_ids=inputs["input_ids"],
|
59 |
pixel_values=inputs["pixel_values"],
|
@@ -64,7 +62,6 @@ def run_example(image):
|
|
64 |
parsed_answer = processor.post_process_generation(generated_text, task=task_prompt, image_size=(image.width, image.height))
|
65 |
return modify_caption(parsed_answer["<DESCRIPTION>"])
|
66 |
|
67 |
-
|
68 |
css = """
|
69 |
#output {
|
70 |
height: 500px;
|
@@ -94,4 +91,4 @@ with gr.Blocks(css=css) as demo:
|
|
94 |
|
95 |
submit_btn.click(run_example, [input_img], [output_text])
|
96 |
|
97 |
-
demo.launch(debug=True)
|
|
|
2 |
from transformers import AutoProcessor, AutoModelForCausalLM
|
3 |
import spaces
|
4 |
import re
|
5 |
+
from PIL import Image
|
6 |
|
7 |
+
# No need to install flash-attn since it's GPU-specific
|
|
|
8 |
|
9 |
+
model = AutoModelForCausalLM.from_pretrained('gokaygokay/Florence-2-SD3-Captioner', trust_remote_code=True).eval()
|
10 |
|
11 |
processor = AutoProcessor.from_pretrained('gokaygokay/Florence-2-SD3-Captioner', trust_remote_code=True)
|
12 |
|
|
|
13 |
TITLE = "# [Florence-2 SD3 Long Captioner](https://huggingface.co/gokaygokay/Florence-2-SD3-Captioner/)"
|
14 |
DESCRIPTION = "[Florence-2 Base](https://huggingface.co/microsoft/Florence-2-base-ft) fine-tuned on Long SD3 Prompt and Image pairs. Check above link for datasets that are used for fine-tuning."
|
15 |
|
|
|
51 |
if image.mode != "RGB":
|
52 |
image = image.convert("RGB")
|
53 |
|
54 |
+
inputs = processor(text=prompt, images=image, return_tensors="pt")
|
55 |
generated_ids = model.generate(
|
56 |
input_ids=inputs["input_ids"],
|
57 |
pixel_values=inputs["pixel_values"],
|
|
|
62 |
parsed_answer = processor.post_process_generation(generated_text, task=task_prompt, image_size=(image.width, image.height))
|
63 |
return modify_caption(parsed_answer["<DESCRIPTION>"])
|
64 |
|
|
|
65 |
css = """
|
66 |
#output {
|
67 |
height: 500px;
|
|
|
91 |
|
92 |
submit_btn.click(run_example, [input_img], [output_text])
|
93 |
|
94 |
+
demo.launch(debug=True)
|