Florence-2-SD3-Captioner-CPU

Paused

App Files Files Community

ChandimaPrabath commited on Jun 26

Commit

0c19cc1

•

1 Parent(s): 7a8d779

Update app.py

Browse files

Files changed (1) hide show

app.py +66 -41

app.py CHANGED Viewed

@@ -1,20 +1,18 @@
-import gradio as gr
 from transformers import AutoProcessor, AutoModelForCausalLM
-import spaces
 import re
 from PIL import Image
 # Install the necessary packages
-import subprocess
-subprocess.run('pip install flash-attn einops --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
-model = AutoModelForCausalLM.from_pretrained('gokaygokay/Florence-2-SD3-Captioner', trust_remote_code=True).eval()
 processor = AutoProcessor.from_pretrained('gokaygokay/Florence-2-SD3-Captioner', trust_remote_code=True)
-TITLE = "# [Florence-2 SD3 Long Captioner](https://huggingface.co/gokaygokay/Florence-2-SD3-Captioner/)"
-DESCRIPTION = "[Florence-2 Base](https://huggingface.co/microsoft/Florence-2-base-ft) fine-tuned on Long SD3 Prompt and Image pairs. Check above link for datasets that are used for fine-tuning."
 def modify_caption(caption: str) -> str:
     """
     Removes specific prefixes from captions if present, otherwise returns the original caption.
@@ -43,9 +41,62 @@ def modify_caption(caption: str) -> str:
     # If the caption was modified, return the modified version; otherwise, return the original
     return modified_caption if modified_caption != caption else caption
-@spaces.GPU
-def run_example(image):
-    image = Image.fromarray(image)
     task_prompt = "<DESCRIPTION>"
     prompt = task_prompt + "Describe this image in great detail."
@@ -62,35 +113,9 @@ def run_example(image):
     )
     generated_text = processor.batch_decode(generated_ids, skip_special_tokens=False)[0]
     parsed_answer = processor.post_process_generation(generated_text, task=task_prompt, image_size=(image.width, image.height))
-    return modify_caption(parsed_answer["<DESCRIPTION>"])
-css = """
-  #output {
-    height: 500px;
-    overflow: auto;
-    border: 1px solid #ccc;
-  }
-"""
-with gr.Blocks(css=css) as demo:
-    gr.Markdown(TITLE)
-    gr.Markdown(DESCRIPTION)
-    with gr.Tab(label="Florence-2 SD3 Prompts"):
-        with gr.Row():
-            with gr.Column():
-                input_img = gr.Image(label="Input Picture")
-                submit_btn = gr.Button(value="Submit")
-            with gr.Column():
-                output_text = gr.Textbox(label="Output Text")
-        gr.Examples(
-            [["image1.jpg"], ["image2.jpg"], ["image3.png"], ["image4.jpg"], ["image5.jpg"], ["image6.PNG"]],
-            inputs = [input_img],
-            outputs = [output_text],
-            fn=run_example,
-            label='Try captioning on below examples'
-            )
-        submit_btn.click(run_example, [input_img], [output_text])
-demo.launch(debug=True)

+from flask import Flask, request, jsonify, render_template_string
 from transformers import AutoProcessor, AutoModelForCausalLM
+import subprocess
 import re
 from PIL import Image
+import io
 # Install the necessary packages
+subprocess.run('pip install flash-attn einops flask', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
+app = Flask(__name__)
+model = AutoModelForCausalLM.from_pretrained('gokaygokay/Florence-2-SD3-Captioner', trust_remote_code=True).eval()
 processor = AutoProcessor.from_pretrained('gokaygokay/Florence-2-SD3-Captioner', trust_remote_code=True)
 def modify_caption(caption: str) -> str:
     """
     Removes specific prefixes from captions if present, otherwise returns the original caption.
     # If the caption was modified, return the modified version; otherwise, return the original
     return modified_caption if modified_caption != caption else caption
+@app.route('/')
+def index():
+    html = '''
+    <!DOCTYPE html>
+    <html>
+    <head>
+        <title>Florence-2 SD3 Long Captioner</title>
+        <style>
+            #output {
+                height: 500px;
+                overflow: auto;
+                border: 1px solid #ccc;
+            }
+        </style>
+    </head>
+    <body>
+        <h1>Florence-2 SD3 Long Captioner</h1>
+        <p>Florence-2 Base fine-tuned on Long SD3 Prompt and Image pairs. Check the Hugging Face link for datasets that are used for fine-tuning.</p>
+        <form id="uploadForm">
+            <label for="imageInput">Input Picture</label>
+            <input type="file" id="imageInput" name="image">
+            <button type="submit">Submit</button>
+        </form>
+        <div id="output">
+            <h3>Output Text</h3>
+            <p id="outputText"></p>
+        </div>
+        <script>
+            document.getElementById('uploadForm').onsubmit = async function(event) {
+                event.preventDefault();
+                const formData = new FormData();
+                const imageFile = document.getElementById('imageInput').files[0];
+                formData.append('image', imageFile);
+                const response = await fetch('/generate', {
+                    method: 'POST',
+                    body: formData
+                });
+                const data = await response.json();
+                document.getElementById('outputText').innerText = data.caption;
+            };
+        </script>
+    </body>
+    </html>
+    '''
+    return render_template_string(html)
+@app.route('/generate', methods=['POST'])
+def generate():
+    if 'image' not in request.files:
+        return jsonify({"error": "No image provided"}), 400
+    image_file = request.files['image']
+    image = Image.open(image_file.stream)
     task_prompt = "<DESCRIPTION>"
     prompt = task_prompt + "Describe this image in great detail."
     )
     generated_text = processor.batch_decode(generated_ids, skip_special_tokens=False)[0]
     parsed_answer = processor.post_process_generation(generated_text, task=task_prompt, image_size=(image.width, image.height))
+    caption = modify_caption(parsed_answer["<DESCRIPTION>"])
+    return jsonify({"caption": caption})
+if __name__ == '__main__':
+    app.run(debug=True)