AlekseyCalvin commited on
Commit
cd7a2f9
1 Parent(s): 86192c9

Update pipeline.py

Browse files
Files changed (1) hide show
  1. pipeline.py +3 -5
pipeline.py CHANGED
@@ -108,7 +108,7 @@ class FluxWithCFGPipeline(DiffusionPipeline, FluxLoraLoaderMixin, FromSingleFile
108
  self,
109
  prompt: Union[str, List[str]] = None,
110
  num_images_per_prompt: int = 1,
111
- max_sequence_length: int = 512,
112
  device: Optional[torch.device] = None,
113
  dtype: Optional[torch.dtype] = None,
114
  ):
@@ -154,7 +154,6 @@ class FluxWithCFGPipeline(DiffusionPipeline, FluxLoraLoaderMixin, FromSingleFile
154
  self,
155
  prompt: Union[str, List[str]],
156
  num_images_per_prompt: int = 1,
157
- max_sequence_length: int = 512,
158
  device: Optional[torch.device] = None,
159
  ):
160
  device = device or self._execution_device
@@ -180,7 +179,7 @@ class FluxWithCFGPipeline(DiffusionPipeline, FluxLoraLoaderMixin, FromSingleFile
180
  "The following part of your input was truncated because CLIP can only handle sequences up to"
181
  f" {self.tokenizer_max_length} tokens: {removed_text}"
182
  )
183
- prompt_embeds = self.text_encoder(text_input_ids.to(device), output_hidden_states=False)
184
 
185
  # Use pooled output of CLIPTextModel
186
  prompt_embeds = prompt_embeds.pooler_output
@@ -190,7 +189,7 @@ class FluxWithCFGPipeline(DiffusionPipeline, FluxLoraLoaderMixin, FromSingleFile
190
 
191
  # duplicate text embeddings for each generation per prompt, using mps friendly method
192
  prompt_embeds = prompt_embeds.repeat(1, num_images_per_prompt)
193
- prompt_embeds = prompt_embeds.view(batch_size * num_images_per_prompt, -1)
194
 
195
  return prompt_embeds
196
 
@@ -273,7 +272,6 @@ class FluxWithCFGPipeline(DiffusionPipeline, FluxLoraLoaderMixin, FromSingleFile
273
  prompt=negative_prompt,
274
  device=device,
275
  num_images_per_prompt=num_images_per_prompt,
276
- max_sequence_length=max_sequence_length,
277
  )
278
 
279
  t5_negative_prompt_embed = self._get_t5_prompt_embeds(
 
108
  self,
109
  prompt: Union[str, List[str]] = None,
110
  num_images_per_prompt: int = 1,
111
+ max_sequence_length: int = 256,
112
  device: Optional[torch.device] = None,
113
  dtype: Optional[torch.dtype] = None,
114
  ):
 
154
  self,
155
  prompt: Union[str, List[str]],
156
  num_images_per_prompt: int = 1,
 
157
  device: Optional[torch.device] = None,
158
  ):
159
  device = device or self._execution_device
 
179
  "The following part of your input was truncated because CLIP can only handle sequences up to"
180
  f" {self.tokenizer_max_length} tokens: {removed_text}"
181
  )
182
+ prompt_embeds = self.text_encoder(text_input_ids.to(device), output_hidden_states=True)
183
 
184
  # Use pooled output of CLIPTextModel
185
  prompt_embeds = prompt_embeds.pooler_output
 
189
 
190
  # duplicate text embeddings for each generation per prompt, using mps friendly method
191
  prompt_embeds = prompt_embeds.repeat(1, num_images_per_prompt)
192
+ prompt_embeds = prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
193
 
194
  return prompt_embeds
195
 
 
272
  prompt=negative_prompt,
273
  device=device,
274
  num_images_per_prompt=num_images_per_prompt,
 
275
  )
276
 
277
  t5_negative_prompt_embed = self._get_t5_prompt_embeds(