dragynir commited on
Commit
903b52c
1 Parent(s): 9c77b03
Files changed (4) hide show
  1. README.md +3 -3
  2. app.py +17 -19
  3. src/inference_no_condition.py +3 -3
  4. src/pipeline.py +7 -8
README.md CHANGED
@@ -28,9 +28,9 @@ a handsome man relaxing in a chair, shirt widely unbuttoned, eyes closed, close
28
  - [x] adaptive resize размеры
29
  - [x] пофиксить загрузку маски
30
  - [x] подобрать нормальные промпты к примерам, добавить negative prompt (https://blog.segmind.com/prompt-guide-for-stable-diffusion-xl-crafting-textual-descriptions-for-image-generation/)
31
- - [ ] разобраться с выставлением device в пайплайн
32
- - [ ] удалить ненужные веса
33
- - [ ] небольшой рефач - dark theme - можно попробовать
34
  - [ ] проверка ссылок
35
  - [ ] залить версию в main
 
36
  - [ ] настроить запуск в hugging space
 
28
  - [x] adaptive resize размеры
29
  - [x] пофиксить загрузку маски
30
  - [x] подобрать нормальные промпты к примерам, добавить negative prompt (https://blog.segmind.com/prompt-guide-for-stable-diffusion-xl-crafting-textual-descriptions-for-image-generation/)
31
+ - [x] удалить ненужные веса
32
+ - [x] небольшой рефач - dark theme - можно попробовать
 
33
  - [ ] проверка ссылок
34
  - [ ] залить версию в main
35
+ - [ ] разобраться с выставлением device в пайплайн
36
  - [ ] настроить запуск в hugging space
app.py CHANGED
@@ -41,12 +41,10 @@ def process(
41
  output.generated_image,
42
  output.control_mask,
43
  ]
44
- # return [input_image, input_image]
45
 
46
 
47
  def read_content(file_path: str) -> str:
48
- """read the content of target file
49
- """
50
  with open(file_path, 'r', encoding='utf-8') as f:
51
  content = f.read()
52
 
@@ -63,36 +61,36 @@ examples = [[image, prompt[0], prompt[1]] for image, prompt in zip(image_list, p
63
  block = gr.Blocks().queue()
64
  with block:
65
  with gr.Row():
66
- gr.HTML(read_content("header.html"))
67
  with gr.Row():
68
  with gr.Column():
69
- input_image = gr.Image(type="numpy")
70
- prompt = gr.Textbox(label="Prompt")
71
- negative_prompt = gr.Textbox(label="Negative Prompt")
72
  with gr.Row():
73
- generate_from_mask = gr.Checkbox(label="Input image is already a control mask", value=False)
74
- run_button = gr.Button(value="Run")
75
- with gr.Accordion("Advanced options", open=False):
76
  target_image_size = gr.Slider(
77
- label="Image target size:",
78
  minimum=512,
79
  maximum=2048,
80
  value=768,
81
  step=64,
82
  )
83
  max_image_size = gr.Slider(
84
- label="Image max size:",
85
  minimum=512,
86
  maximum=2048,
87
  value=1024,
88
  step=64,
89
  )
90
- num_inference_steps = gr.Slider(label="Number of steps", minimum=1, maximum=100, value=20, step=1)
91
- guidance_scale = gr.Slider(label="Guidance scale", minimum=0.1, maximum=30.0, value=9.0, step=0.1)
92
- conditioning_scale = gr.Slider(label="Conditioning scale", minimum=0.0, maximum=5.0, value=1.0, step=0.1)
93
- seed = gr.Slider(label="Seed", minimum=0, maximum=config.max_seed, step=1, value=0)
94
 
95
- gr.Examples(examples=examples, inputs=[input_image, prompt, negative_prompt], label="Examples - Input Images", examples_per_page=12)
96
 
97
  gr.HTML(
98
  """
@@ -105,8 +103,8 @@ with block:
105
  """)
106
 
107
  with gr.Column():
108
- generated_output = gr.Image(label="Generated", type="numpy", elem_id="generated")
109
- mask_output = gr.Image(label="Mask", type="numpy", elem_id="mask")
110
 
111
  ips = [input_image, prompt, negative_prompt, generate_from_mask, num_inference_steps, guidance_scale, conditioning_scale, target_image_size, max_image_size, seed]
112
  run_button.click(fn=process, inputs=ips, outputs=[generated_output, mask_output])
 
41
  output.generated_image,
42
  output.control_mask,
43
  ]
 
44
 
45
 
46
  def read_content(file_path: str) -> str:
47
+ """Read the content of target file."""
 
48
  with open(file_path, 'r', encoding='utf-8') as f:
49
  content = f.read()
50
 
 
61
  block = gr.Blocks().queue()
62
  with block:
63
  with gr.Row():
64
+ gr.HTML(read_content('header.html'))
65
  with gr.Row():
66
  with gr.Column():
67
+ input_image = gr.Image(type='numpy')
68
+ prompt = gr.Textbox(label='Prompt')
69
+ negative_prompt = gr.Textbox(label='Negative Prompt')
70
  with gr.Row():
71
+ generate_from_mask = gr.Checkbox(label='Input image is already a control mask', value=False)
72
+ run_button = gr.Button(value='Run')
73
+ with gr.Accordion('Advanced options', open=False):
74
  target_image_size = gr.Slider(
75
+ label='Image target size:',
76
  minimum=512,
77
  maximum=2048,
78
  value=768,
79
  step=64,
80
  )
81
  max_image_size = gr.Slider(
82
+ label='Image max size:',
83
  minimum=512,
84
  maximum=2048,
85
  value=1024,
86
  step=64,
87
  )
88
+ num_inference_steps = gr.Slider(label='Number of steps', minimum=1, maximum=100, value=20, step=1)
89
+ guidance_scale = gr.Slider(label='Guidance scale', minimum=0.1, maximum=30.0, value=9.0, step=0.1)
90
+ conditioning_scale = gr.Slider(label='Conditioning scale', minimum=0.0, maximum=5.0, value=1.0, step=0.1)
91
+ seed = gr.Slider(label='Seed', minimum=0, maximum=config.max_seed, step=1, value=0)
92
 
93
+ gr.Examples(examples=examples, inputs=[input_image, prompt, negative_prompt], label='Examples - Input Images', examples_per_page=12)
94
 
95
  gr.HTML(
96
  """
 
103
  """)
104
 
105
  with gr.Column():
106
+ generated_output = gr.Image(label='Generated', type='numpy', elem_id='generated')
107
+ mask_output = gr.Image(label='Mask', type='numpy', elem_id='mask')
108
 
109
  ips = [input_image, prompt, negative_prompt, generate_from_mask, num_inference_steps, guidance_scale, conditioning_scale, target_image_size, max_image_size, seed]
110
  run_button.click(fn=process, inputs=ips, outputs=[generated_output, mask_output])
src/inference_no_condition.py CHANGED
@@ -6,13 +6,13 @@ import matplotlib.pyplot as plt
6
 
7
 
8
  pipe = DiffusionPipeline.from_pretrained(
9
- "stabilityai/stable-diffusion-xl-base-1.0",
10
  torch_dtype=torch.float16,
11
  use_safetensors=True,
12
- variant="fp16",
13
  )
14
 
15
- pipe.to("cuda")
16
 
17
  with open('../examples/prompts.json', 'r') as f:
18
  prompts_list = list(json.load(f).values())
 
6
 
7
 
8
  pipe = DiffusionPipeline.from_pretrained(
9
+ 'stabilityai/stable-diffusion-xl-base-1.0',
10
  torch_dtype=torch.float16,
11
  use_safetensors=True,
12
+ variant='fp16',
13
  )
14
 
15
+ pipe.to('cuda')
16
 
17
  with open('../examples/prompts.json', 'r') as f:
18
  prompts_list = list(json.load(f).values())
src/pipeline.py CHANGED
@@ -9,11 +9,9 @@ import torch
9
 
10
  from src.preprocess import HWC3
11
  from src.unet.predictor import generate_mask, load_seg_model
12
-
13
  from config import PipelineConfig
14
 
15
 
16
-
17
  @dataclass
18
  class PipelineOutput:
19
  control_mask: np.ndarray
@@ -49,7 +47,7 @@ class FashionPipeline:
49
  max_image_size: int,
50
  seed: int,
51
  ) -> PipelineOutput:
52
-
53
  # check image format
54
  control_image = HWC3(control_image)
55
 
@@ -99,7 +97,10 @@ class FashionPipeline:
99
  max_image_size: int = 768,
100
  divisible: int = 64,
101
  ) -> Image:
102
-
 
 
 
103
  assert target_image_size % divisible == 0
104
  assert max_image_size % divisible == 0
105
  assert max_image_size >= target_image_size
@@ -130,15 +131,13 @@ class FashionPipeline:
130
  self.controlnet = ControlNetModel.from_pretrained(
131
  self.config.controlnet_path,
132
  torch_dtype=torch.float16,
133
- # device_map="auto",
134
- )
135
 
136
  self.pipeline = StableDiffusionXLControlNetPipeline.from_pretrained(
137
  self.config.base_model_path,
138
  controlnet=self.controlnet,
139
  torch_dtype=torch.float16,
140
- # device_map="auto",
141
- )
142
 
143
  self.pipeline.scheduler = UniPCMultistepScheduler.from_config(self.pipeline.scheduler.config)
144
 
 
9
 
10
  from src.preprocess import HWC3
11
  from src.unet.predictor import generate_mask, load_seg_model
 
12
  from config import PipelineConfig
13
 
14
 
 
15
  @dataclass
16
  class PipelineOutput:
17
  control_mask: np.ndarray
 
47
  max_image_size: int,
48
  seed: int,
49
  ) -> PipelineOutput:
50
+ """Runs image generation pipeline."""
51
  # check image format
52
  control_image = HWC3(control_image)
53
 
 
97
  max_image_size: int = 768,
98
  divisible: int = 64,
99
  ) -> Image:
100
+ """Resizes the image so that width and height are
101
+ divided by 'divisible' while maintaining aspect ratio.
102
+ Restrict image size with target_image_size and max_image_size.
103
+ """
104
  assert target_image_size % divisible == 0
105
  assert max_image_size % divisible == 0
106
  assert max_image_size >= target_image_size
 
131
  self.controlnet = ControlNetModel.from_pretrained(
132
  self.config.controlnet_path,
133
  torch_dtype=torch.float16,
134
+ ).to(self.device)
 
135
 
136
  self.pipeline = StableDiffusionXLControlNetPipeline.from_pretrained(
137
  self.config.base_model_path,
138
  controlnet=self.controlnet,
139
  torch_dtype=torch.float16,
140
+ ).to(self.device)
 
141
 
142
  self.pipeline.scheduler = UniPCMultistepScheduler.from_config(self.pipeline.scheduler.config)
143