Spaces:

thomasgauthier
/

HowJanusSeesItself

Running on Zero

File size: 2,331 Bytes

bdf9962
 
 
a5d57e1
 
 
 
 
 
fc91aa0
 
bdf9962
fc91aa0
bdf9962
cb2237b
bdf9962
bd0e8c7
bdf9962
 
 
 
 
 
a5d57e1
bdf9962
 
 
edb3f33
 
 
 
 
 
 
 
 
 
bdf9962
 
 
 
 
 
edb3f33
a5d57e1
 
 
edb3f33
bdf9962

import gradio as gr
from PIL import Image

examples = [
    [Image.open("examples/in0.jpg"), Image.open("examples/out0.webp")],
    [Image.open("examples/in1.webp"), Image.open("examples/out1.png")],
    [Image.open("examples/in2.jpg"), Image.open("examples/out2.png")],
    [Image.open("examples/in3.jpg"), Image.open("examples/out3.png")],
]

def create_gradio_interface(process_and_generate):
    def gradio_process_and_generate(input_image, prompt, num_images, cfg_weight):
        return process_and_generate(input_image, prompt, num_images, cfg_weight)

    explanation = """[Janus 1.3B](https://huggingface.co/deepseek-ai/Janus-1.3B) uses differerent visual encoders for understanding and generation.

<img src="https://huggingface.co/spaces/thomasgauthier/HowJanusSeesItself/raw/main/images/janus_architecture.svg" alt="Janus Model Architecture">

Here, by feeding the model an image and then asking it to generate that same image, we visualize the model's ability to translate input (understanding) embedding space to generative embedding space."""

    with gr.Blocks() as demo:
        gr.Markdown("# How Janus-1.3B sees itself")

        dummy = gr.Image(type="filepath", label="Generated Image", visible=False)
        with gr.Row():
            input_image = gr.Image(type="filepath", label="Input Image")
            output_images = gr.Gallery(label="Generated Images", columns=2, rows=2)

        with gr.Row():
            # New layout here: Controls on the left, explanation on the right
            with gr.Column(scale=1):
                prompt = gr.Textbox(label="Prompt", value="Exactly what is shown in the image.")
                num_images = gr.Slider(minimum=1, maximum=12, value=12, step=1, label="Number of Images to Generate")
                cfg_weight = gr.Slider(minimum=1, maximum=10, value=5, step=0.1, label="CFG Weight")
                generate_btn = gr.Button("Generate", variant="primary", size="lg")
            with gr.Column(scale=2):
                gr.Markdown(explanation)
        
        generate_btn.click(
            fn=gradio_process_and_generate,
            inputs=[input_image, prompt, num_images, cfg_weight],
            outputs=output_images
        )

        gr.Examples(
            examples=examples,
            inputs=[input_image, dummy]
        )

    return demo