janusAI

Running on Zero

App Files Files Community

Deadmon commited on 14 days ago

Commit

e10f0a4

•

1 Parent(s): 75a7f7d

Update app.py

Browse files

Files changed (1) hide show

app.py +27 -20

app.py CHANGED Viewed

@@ -15,6 +15,9 @@ def generate(input_ids,
              cfg_weight: float = 5,
              image_token_num_per_image: int = 576,
              patch_size: int = 16):
     tokens = torch.zeros((parallel_size * 2, len(input_ids)), dtype=torch.int).to(cuda_device)
     for i in range(parallel_size * 2):
         tokens[i, :] = input_ids
@@ -55,32 +58,36 @@ def unpack(dec, width, height, parallel_size=1):
     return visual_img
 @torch.inference_mode()
-@spaces.GPU  # Decorate the function for ZeroGPU compatibility
 def generate_image(prompt,
                    width,
                    height,
                    guidance,
                    seed):
-    if seed > -1:
-        generator = torch.Generator('cpu').manual_seed(seed)
-    else:
-        generator = None
-    messages = [{'role': 'User', 'content': prompt},
-                {'role': 'Assistant', 'content': ''}]
-    text = processor.apply_sft_template_for_multi_turn_prompts(conversations=messages,
-                                                               sft_format=processor.sft_format,
-                                                               system_prompt='')
-    text = text + processor.image_start_tag
-    input_ids = torch.LongTensor(processor.tokenizer.encode(text))
-    output, patches = generate(input_ids,
-                               width // 16 * 16,
-                               height // 16 * 16,
-                               cfg_weight=guidance)
-    images = unpack(patches,
-                    width // 16 * 16,
-                    height // 16 * 16)
-    return Image.fromarray(images[0]), seed, ''
 with gr.Blocks() as demo:
     with gr.Row():

              cfg_weight: float = 5,
              image_token_num_per_image: int = 576,
              patch_size: int = 16):
+    # Clear CUDA cache before generating
+    torch.cuda.empty_cache()
     tokens = torch.zeros((parallel_size * 2, len(input_ids)), dtype=torch.int).to(cuda_device)
     for i in range(parallel_size * 2):
         tokens[i, :] = input_ids
     return visual_img
 @torch.inference_mode()
+@spaces.GPU(duration=120)  # Specify a duration to avoid timeout
 def generate_image(prompt,
                    width,
                    height,
                    guidance,
                    seed):
+    # Clear CUDA cache and avoid tracking gradients
+    torch.cuda.empty_cache()
+    with torch.no_grad():
+        if seed > -1:
+            generator = torch.Generator('cpu').manual_seed(seed)
+        else:
+            generator = None
+        messages = [{'role': 'User', 'content': prompt},
+                    {'role': 'Assistant', 'content': ''}]
+        text = processor.apply_sft_template_for_multi_turn_prompts(conversations=messages,
+                                                                   sft_format=processor.sft_format,
+                                                                   system_prompt='')
+        text = text + processor.image_start_tag
+        input_ids = torch.LongTensor(processor.tokenizer.encode(text))
+        output, patches = generate(input_ids,
+                                   width // 16 * 16,
+                                   height // 16 * 16,
+                                   cfg_weight=guidance)
+        images = unpack(patches,
+                        width // 16 * 16,
+                        height // 16 * 16)
+        return Image.fromarray(images[0]), seed, ''
 with gr.Blocks() as demo:
     with gr.Row():