ChandimaPrabath commited on
Commit
30085b1
1 Parent(s): 71141aa

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -8
app.py CHANGED
@@ -2,16 +2,14 @@ import gradio as gr
2
  from transformers import AutoProcessor, AutoModelForCausalLM
3
  import spaces
4
  import re
5
- from PIL import Image
6
 
7
- import subprocess
8
- subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
9
 
10
- model = AutoModelForCausalLM.from_pretrained('gokaygokay/Florence-2-SD3-Captioner', trust_remote_code=True).to("cuda").eval()
11
 
12
  processor = AutoProcessor.from_pretrained('gokaygokay/Florence-2-SD3-Captioner', trust_remote_code=True)
13
 
14
-
15
  TITLE = "# [Florence-2 SD3 Long Captioner](https://huggingface.co/gokaygokay/Florence-2-SD3-Captioner/)"
16
  DESCRIPTION = "[Florence-2 Base](https://huggingface.co/microsoft/Florence-2-base-ft) fine-tuned on Long SD3 Prompt and Image pairs. Check above link for datasets that are used for fine-tuning."
17
 
@@ -53,7 +51,7 @@ def run_example(image):
53
  if image.mode != "RGB":
54
  image = image.convert("RGB")
55
 
56
- inputs = processor(text=prompt, images=image, return_tensors="pt").to("cuda")
57
  generated_ids = model.generate(
58
  input_ids=inputs["input_ids"],
59
  pixel_values=inputs["pixel_values"],
@@ -64,7 +62,6 @@ def run_example(image):
64
  parsed_answer = processor.post_process_generation(generated_text, task=task_prompt, image_size=(image.width, image.height))
65
  return modify_caption(parsed_answer["<DESCRIPTION>"])
66
 
67
-
68
  css = """
69
  #output {
70
  height: 500px;
@@ -94,4 +91,4 @@ with gr.Blocks(css=css) as demo:
94
 
95
  submit_btn.click(run_example, [input_img], [output_text])
96
 
97
- demo.launch(debug=True)
 
2
  from transformers import AutoProcessor, AutoModelForCausalLM
3
  import spaces
4
  import re
5
+ from PIL import Image
6
 
7
+ # No need to install flash-attn since it's GPU-specific
 
8
 
9
+ model = AutoModelForCausalLM.from_pretrained('gokaygokay/Florence-2-SD3-Captioner', trust_remote_code=True).eval()
10
 
11
  processor = AutoProcessor.from_pretrained('gokaygokay/Florence-2-SD3-Captioner', trust_remote_code=True)
12
 
 
13
  TITLE = "# [Florence-2 SD3 Long Captioner](https://huggingface.co/gokaygokay/Florence-2-SD3-Captioner/)"
14
  DESCRIPTION = "[Florence-2 Base](https://huggingface.co/microsoft/Florence-2-base-ft) fine-tuned on Long SD3 Prompt and Image pairs. Check above link for datasets that are used for fine-tuning."
15
 
 
51
  if image.mode != "RGB":
52
  image = image.convert("RGB")
53
 
54
+ inputs = processor(text=prompt, images=image, return_tensors="pt")
55
  generated_ids = model.generate(
56
  input_ids=inputs["input_ids"],
57
  pixel_values=inputs["pixel_values"],
 
62
  parsed_answer = processor.post_process_generation(generated_text, task=task_prompt, image_size=(image.width, image.height))
63
  return modify_caption(parsed_answer["<DESCRIPTION>"])
64
 
 
65
  css = """
66
  #output {
67
  height: 500px;
 
91
 
92
  submit_btn.click(run_example, [input_img], [output_text])
93
 
94
+ demo.launch(debug=True)