apolinario commited on
Commit
ad1fd8e
1 Parent(s): ca37dd4

VQGAN attempt

Browse files
Files changed (3) hide show
  1. app.py +34 -17
  2. flavors.jpg +0 -0
  3. requirements.txt +2 -1
app.py CHANGED
@@ -32,7 +32,7 @@ import subprocess
32
  import imageio
33
  from PIL import ImageFile, Image
34
  import time
35
-
36
 
37
  import hashlib
38
  from PIL.PngImagePlugin import PngImageFile, PngInfo
@@ -41,6 +41,7 @@ import urllib.request
41
  from random import randint
42
  from pathvalidate import sanitize_filename
43
  from huggingface_hub import hf_hub_download
 
44
 
45
  device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
46
  print("Using device:", device)
@@ -75,7 +76,7 @@ perceptor = (
75
  .requires_grad_(False)
76
  .to(device)
77
  )
78
- def run_all(user_input,num_steps, template, width,height):
79
  import random
80
  #if uploaded_file is not None:
81
  #uploaded_folder = f"{DefaultPaths.root_path}/uploaded"
@@ -89,8 +90,7 @@ def run_all(user_input,num_steps, template, width,height):
89
  #pass
90
  #else:
91
  image_path = None
92
- flavor = 'cumin'
93
-
94
  args2 = argparse.Namespace(
95
  prompt=user_input,
96
  seed=int(random.randint(0, 2147483647)),
@@ -103,7 +103,7 @@ def run_all(user_input,num_steps, template, width,height):
103
  template=template,
104
  vqgan_model='ImageNet 16384',
105
  seed_image=image_path,
106
- image_file="progress.png",
107
  #frame_dir=intermediary_folder,
108
  )
109
  if args2.seed is not None:
@@ -1299,6 +1299,7 @@ def run_all(user_input,num_steps, template, width,height):
1299
  z_orig = z.tensor.clone()
1300
  z.requires_grad_(True)
1301
  # opt = optim.AdamW(z.parameters(), lr=args.mse_step_size, weight_decay=0.00000000)
 
1302
  if self.normal_flip_optim == True:
1303
  if randint(1, 2) == 1:
1304
  opt = torch.optim.AdamW(
@@ -1430,8 +1431,7 @@ def run_all(user_input,num_steps, template, width,height):
1430
 
1431
  sys.stdout.write("Iteration {}".format(i) + "\n")
1432
  sys.stdout.flush()
1433
-
1434
- if i % args2.update == 0:
1435
  self.checkin(i, lossAll, x)
1436
 
1437
  loss = sum(lossAll)
@@ -1493,6 +1493,8 @@ def run_all(user_input,num_steps, template, width,height):
1493
  def run(self, x):
1494
  j = 0
1495
  try:
 
 
1496
  before_start_time = time.perf_counter()
1497
  total_steps = int(args.max_iterations + args.mse_end) - 1
1498
  for _ in range(total_steps):
@@ -1516,9 +1518,9 @@ def run_all(user_input,num_steps, template, width,height):
1516
  import shutil
1517
  import os
1518
 
1519
- image_data = Image.open(args2.image_file)
1520
- print(image_data)
1521
- return(image_data)
1522
 
1523
  except KeyboardInterrupt:
1524
  pass
@@ -2289,14 +2291,16 @@ def run_all(user_input,num_steps, template, width,height):
2289
  is_gumbel=is_gumbel,
2290
  gen_seed=gen_seed,
2291
  )
2292
-
2293
  mh = ModelHost(args)
2294
  x = 0
2295
 
2296
  #for x in range(batch_size):
2297
  mh.setup_model(x)
2298
- last_iter = mh.run(x)
2299
- return(last_iter)
 
 
 
2300
  #x = x + 1
2301
 
2302
  if zoom:
@@ -2322,18 +2326,31 @@ def run_all(user_input,num_steps, template, width,height):
2322
 
2323
  ##################### START GRADIO HERE ############################
2324
  image = gr.outputs.Image(type="pil", label="Your result")
 
 
 
 
 
 
 
 
 
 
 
2325
  iface = gr.Interface(
2326
  fn=run_all,
2327
  inputs=[
2328
  gr.inputs.Textbox(label="Prompt - try adding increments to your prompt such as 'oil on canvas', 'a painting', 'a book cover'",default="chalk pastel drawing of a dog wearing a funny hat"),
2329
- gr.inputs.Slider(label="Steps - more steps can increase quality but will take longer to generate",default=300,maximum=300,minimum=1,step=1),
2330
- gr.inputs.Dropdown(label="Style",choices=["none","Balanced","Detailed","Consistent Creativity","Realistic","Smooth","Subtle MSE","Hyper Fast Results"]),
 
 
2331
  gr.inputs.Radio(label="Width", choices=[32,64,128,256,512],default=256),
2332
  gr.inputs.Radio(label="Height", choices=[32,64,128,256,512],default=256),
2333
  ],
2334
  outputs=image,
2335
- title="Generate images from text with VQGAN+CLIP",
2336
  #description="<div>By typing a prompt and pressing submit you can generate images based on this prompt. <a href='https://github.com/CompVis/latent-diffusion' target='_blank'>Latent Diffusion</a> is a text-to-image model created by <a href='https://github.com/CompVis' target='_blank'>CompVis</a>, trained on the <a href='https://laion.ai/laion-400-open-dataset/'>LAION-400M dataset.</a><br>This UI to the model was assembled by <a style='color: rgb(245, 158, 11);font-weight:bold' href='https://twitter.com/multimodalart' target='_blank'>@multimodalart</a></div>",
2337
  #article="<h4 style='font-size: 110%;margin-top:.5em'>Biases acknowledgment</h4><div>Despite how impressive being able to turn text into image is, beware to the fact that this model may output content that reinforces or exarcbates societal biases. According to the <a href='https://arxiv.org/abs/2112.10752' target='_blank'>Latent Diffusion paper</a>:<i> \"Deep learning modules tend to reproduce or exacerbate biases that are already present in the data\"</i>. The model was trained on an unfiltered version the LAION-400M dataset, which scrapped non-curated image-text-pairs from the internet (the exception being the the removal of illegal content) and is meant to be used for research purposes, such as this one. <a href='https://laion.ai/laion-400-open-dataset/' target='_blank'>You can read more on LAION's website</a></div><h4 style='font-size: 110%;margin-top:1em'>Who owns the images produced by this demo?</h4><div>Definetly not me! Probably you do. I say probably because the Copyright discussion about AI generated art is ongoing. So <a href='https://www.theverge.com/2022/2/21/22944335/us-copyright-office-reject-ai-generated-art-recent-entrance-to-paradise' target='_blank'>it may be the case that everything produced here falls automatically into the public domain</a>. But in any case it is either yours or is in the public domain.</div>"
2338
  )
2339
- iface.launch(enable_queue=True)
 
32
  import imageio
33
  from PIL import ImageFile, Image
34
  import time
35
+ import base64
36
 
37
  import hashlib
38
  from PIL.PngImagePlugin import PngImageFile, PngInfo
 
41
  from random import randint
42
  from pathvalidate import sanitize_filename
43
  from huggingface_hub import hf_hub_download
44
+ import shortuuid
45
 
46
  device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
47
  print("Using device:", device)
 
76
  .requires_grad_(False)
77
  .to(device)
78
  )
79
+ def run_all(user_input, num_steps, flavor, markdown, template, width,height):
80
  import random
81
  #if uploaded_file is not None:
82
  #uploaded_folder = f"{DefaultPaths.root_path}/uploaded"
 
90
  #pass
91
  #else:
92
  image_path = None
93
+ url = shortuuid.uuid()
 
94
  args2 = argparse.Namespace(
95
  prompt=user_input,
96
  seed=int(random.randint(0, 2147483647)),
 
103
  template=template,
104
  vqgan_model='ImageNet 16384',
105
  seed_image=image_path,
106
+ image_file=f"{url}.png",
107
  #frame_dir=intermediary_folder,
108
  )
109
  if args2.seed is not None:
 
1299
  z_orig = z.tensor.clone()
1300
  z.requires_grad_(True)
1301
  # opt = optim.AdamW(z.parameters(), lr=args.mse_step_size, weight_decay=0.00000000)
1302
+ print("Step size inside:", args.step_size)
1303
  if self.normal_flip_optim == True:
1304
  if randint(1, 2) == 1:
1305
  opt = torch.optim.AdamW(
 
1431
 
1432
  sys.stdout.write("Iteration {}".format(i) + "\n")
1433
  sys.stdout.flush()
1434
+ if i % (args2.iterations-2) == 0:
 
1435
  self.checkin(i, lossAll, x)
1436
 
1437
  loss = sum(lossAll)
 
1493
  def run(self, x):
1494
  j = 0
1495
  try:
1496
+ print("Step size: ", args.step_size)
1497
+ print("Step MSE size: ", args.mse_step_size)
1498
  before_start_time = time.perf_counter()
1499
  total_steps = int(args.max_iterations + args.mse_end) - 1
1500
  for _ in range(total_steps):
 
1518
  import shutil
1519
  import os
1520
 
1521
+ #image_data = Image.open(args2.image_file)
1522
+ #os.remove(args2.image_file)
1523
+ #return(image_data)
1524
 
1525
  except KeyboardInterrupt:
1526
  pass
 
2291
  is_gumbel=is_gumbel,
2292
  gen_seed=gen_seed,
2293
  )
 
2294
  mh = ModelHost(args)
2295
  x = 0
2296
 
2297
  #for x in range(batch_size):
2298
  mh.setup_model(x)
2299
+ mh.run(x)
2300
+ image_data = Image.open(args2.image_file)
2301
+ os.remove(args2.image_file)
2302
+ return(image_data)
2303
+ #return(last_iter)
2304
  #x = x + 1
2305
 
2306
  if zoom:
 
2326
 
2327
  ##################### START GRADIO HERE ############################
2328
  image = gr.outputs.Image(type="pil", label="Your result")
2329
+ def cvt_2_base64(file_name):
2330
+ with open(file_name , "rb") as image_file :
2331
+ data = base64.b64encode(image_file.read())
2332
+ return data.decode('utf-8')
2333
+ base64image = "data:image/jpg;base64,"+cvt_2_base64('flavors.jpg')
2334
+ markdown = gr.Markdown("<img src='"+base64image+"' />")
2335
+ def test(raw_input):
2336
+ pass
2337
+ setattr(markdown, "requires_permissions", False)
2338
+ setattr(markdown, "label", "Flavors")
2339
+ setattr(markdown, "preprocess", test)
2340
  iface = gr.Interface(
2341
  fn=run_all,
2342
  inputs=[
2343
  gr.inputs.Textbox(label="Prompt - try adding increments to your prompt such as 'oil on canvas', 'a painting', 'a book cover'",default="chalk pastel drawing of a dog wearing a funny hat"),
2344
+ gr.inputs.Slider(label="Steps - more steps can increase quality but will take longer to generate",default=50,maximum=250,minimum=1,step=1),
2345
+ gr.inputs.Dropdown(label="Flavor",choices=["ginger", "cumin", "holywater", "zynth", "wyvern", "aaron", "moth", "juu", "custom"]),
2346
+ markdown,
2347
+ gr.inputs.Dropdown(label="Style",choices=["Default","Balanced","Detailed","Consistent Creativity","Realistic","Smooth","Subtle MSE","Hyper Fast Results"],default="Hyper Fast Results"),
2348
  gr.inputs.Radio(label="Width", choices=[32,64,128,256,512],default=256),
2349
  gr.inputs.Radio(label="Height", choices=[32,64,128,256,512],default=256),
2350
  ],
2351
  outputs=image,
2352
+ title="Generate images from text with VQGAN+CLIP (Hypertron v2)",
2353
  #description="<div>By typing a prompt and pressing submit you can generate images based on this prompt. <a href='https://github.com/CompVis/latent-diffusion' target='_blank'>Latent Diffusion</a> is a text-to-image model created by <a href='https://github.com/CompVis' target='_blank'>CompVis</a>, trained on the <a href='https://laion.ai/laion-400-open-dataset/'>LAION-400M dataset.</a><br>This UI to the model was assembled by <a style='color: rgb(245, 158, 11);font-weight:bold' href='https://twitter.com/multimodalart' target='_blank'>@multimodalart</a></div>",
2354
  #article="<h4 style='font-size: 110%;margin-top:.5em'>Biases acknowledgment</h4><div>Despite how impressive being able to turn text into image is, beware to the fact that this model may output content that reinforces or exarcbates societal biases. According to the <a href='https://arxiv.org/abs/2112.10752' target='_blank'>Latent Diffusion paper</a>:<i> \"Deep learning modules tend to reproduce or exacerbate biases that are already present in the data\"</i>. The model was trained on an unfiltered version the LAION-400M dataset, which scrapped non-curated image-text-pairs from the internet (the exception being the the removal of illegal content) and is meant to be used for research purposes, such as this one. <a href='https://laion.ai/laion-400-open-dataset/' target='_blank'>You can read more on LAION's website</a></div><h4 style='font-size: 110%;margin-top:1em'>Who owns the images produced by this demo?</h4><div>Definetly not me! Probably you do. I say probably because the Copyright discussion about AI generated art is ongoing. So <a href='https://www.theverge.com/2022/2/21/22944335/us-copyright-office-reject-ai-generated-art-recent-entrance-to-paradise' target='_blank'>it may be the case that everything produced here falls automatically into the public domain</a>. But in any case it is either yours or is in the public domain.</div>"
2355
  )
2356
+ iface.launch()
flavors.jpg ADDED
requirements.txt CHANGED
@@ -25,4 +25,5 @@ pathvalidate
25
  stegano
26
  imgtag
27
  timm
28
- python-xmp-toolkit
 
 
25
  stegano
26
  imgtag
27
  timm
28
+ python-xmp-toolkit
29
+ shortuuid