fffiloni commited on
Commit
ea50b7d
1 Parent(s): 7eee4f5

better GPU memory management

Browse files
Files changed (1) hide show
  1. main.py +100 -24
main.py CHANGED
@@ -15,35 +15,83 @@ from rewards import get_reward_losses
15
  from training import LatentNoiseTrainer, get_optimizer
16
 
17
 
18
- import torch
19
- import gc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
 
21
  def clear_gpu():
22
  """Clear GPU memory by removing tensors, freeing cache, and moving data to CPU."""
23
  # List memory usage before clearing
24
  print(f"Memory allocated before clearing: {torch.cuda.memory_allocated() / (1024 ** 2)} MB")
25
  print(f"Memory reserved before clearing: {torch.cuda.memory_reserved() / (1024 ** 2)} MB")
26
-
27
- # Force the garbage collector to free unreferenced objects
28
- gc.collect()
29
 
30
  # Move any bound tensors back to CPU if needed
31
  if torch.cuda.is_available():
32
- torch.cuda.empty_cache() # Free up the cached memory
33
- torch.cuda.ipc_collect() # Clear any cross-process memory
 
34
 
35
  print(f"Memory allocated after clearing: {torch.cuda.memory_allocated() / (1024 ** 2)} MB")
36
  print(f"Memory reserved after clearing: {torch.cuda.memory_reserved() / (1024 ** 2)} MB")
37
 
38
  def unload_previous_model_if_needed(loaded_model_setup):
 
 
 
 
 
 
 
 
 
39
  """Unload the current model from the GPU and free resources if a new model is being loaded."""
40
- if loaded_model_setup is not None:
41
- print("Unloading previous model from GPU to free memory.")
42
- previous_model = loaded_model_setup[7] # Assuming pipe is at position [7] in the setup
43
- if hasattr(previous_model, 'to') and loaded_model_setup[0].model != "flux":
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
  previous_model.to('cpu') # Move model to CPU to free GPU memory
45
- del previous_model # Delete the reference to the model
46
- clear_gpu() # Clear all remaining GPU memory
 
 
 
 
 
 
 
47
 
48
  def setup(args, loaded_model_setup=None):
49
  seed_everything(args.seed)
@@ -144,7 +192,6 @@ def setup(args, loaded_model_setup=None):
144
 
145
  # Final memory cleanup after model loading
146
  torch.cuda.empty_cache()
147
- gc.collect()
148
 
149
  trainer = LatentNoiseTrainer(
150
  reward_losses=reward_losses,
@@ -188,7 +235,7 @@ def setup(args, loaded_model_setup=None):
188
 
189
  # Final memory cleanup
190
  torch.cuda.empty_cache() # Free up cached memory
191
- gc.collect()
192
 
193
 
194
 
@@ -200,23 +247,30 @@ def setup(args, loaded_model_setup=None):
200
  def execute_task(args, trainer, device, dtype, shape, enable_grad, settings, pipe, progress_callback=None):
201
 
202
  if args.task == "single":
 
 
 
203
  # Attempt to move the model to GPU if model is not Flux
204
  if args.model != "flux":
205
- if args.model != "pixart":
206
  if pipe.device != torch.device('cuda'):
207
- pipe.to(device, dtype)
208
- else:
 
 
 
 
 
209
  if pipe.device != torch.device('cuda'):
210
  pipe.to(device)
 
 
 
211
  else:
212
- print(f"PIPE:{pipe}")
213
-
214
 
215
  if args.cpu_offloading:
216
  pipe.enable_sequential_cpu_offload()
217
 
218
- #if pipe.device != torch.device('cuda'):
219
- # pipe.to(device, dtype)
220
 
221
  if args.enable_multi_apply:
222
 
@@ -232,8 +286,8 @@ def execute_task(args, trainer, device, dtype, shape, enable_grad, settings, pip
232
  multi_apply_fn = None
233
 
234
  torch.cuda.empty_cache() # Free up cached memory
235
- gc.collect()
236
-
237
 
238
  init_latents = torch.randn(shape, device=device, dtype=dtype)
239
  latents = torch.nn.Parameter(init_latents, requires_grad=enable_grad)
@@ -246,6 +300,28 @@ def execute_task(args, trainer, device, dtype, shape, enable_grad, settings, pip
246
  best_image.save(f"{save_dir}/best_image.png")
247
  #init_image.save(f"{save_dir}/init_image.png")
248
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
249
  elif args.task == "example-prompts":
250
  fo = open("assets/example_prompts.txt", "r")
251
  prompts = fo.readlines()
 
15
  from training import LatentNoiseTrainer, get_optimizer
16
 
17
 
18
+ def find_and_move_object_to_cpu():
19
+ for obj in gc.get_objects():
20
+ try:
21
+ # Check if the object is a PyTorch model
22
+ if isinstance(obj, torch.nn.Module):
23
+ # Check if any parameter of the model is on CUDA
24
+ if any(param.is_cuda for param in obj.parameters()):
25
+ print(f"Found PyTorch model on CUDA: {type(obj).__name__}")
26
+ # Move the model to CPU
27
+ obj.to('cpu')
28
+ print(f"Moved {type(obj).__name__} to CPU.")
29
+
30
+ # Optionally check if buffers are on CUDA
31
+ if any(buf.is_cuda for buf in obj.buffers()):
32
+ print(f"Found buffer on CUDA in {type(obj).__name__}")
33
+ obj.to('cpu')
34
+ print(f"Moved buffers of {type(obj).__name__} to CPU.")
35
+
36
+ except Exception as e:
37
+ # Handle any exceptions if obj is not a torch model
38
+ pass
39
+
40
 
41
  def clear_gpu():
42
  """Clear GPU memory by removing tensors, freeing cache, and moving data to CPU."""
43
  # List memory usage before clearing
44
  print(f"Memory allocated before clearing: {torch.cuda.memory_allocated() / (1024 ** 2)} MB")
45
  print(f"Memory reserved before clearing: {torch.cuda.memory_reserved() / (1024 ** 2)} MB")
 
 
 
46
 
47
  # Move any bound tensors back to CPU if needed
48
  if torch.cuda.is_available():
49
+ torch.cuda.empty_cache()
50
+ torch.cuda.synchronize() # Ensure that all operations are completed
51
+ print("GPU memory cleared.")
52
 
53
  print(f"Memory allocated after clearing: {torch.cuda.memory_allocated() / (1024 ** 2)} MB")
54
  print(f"Memory reserved after clearing: {torch.cuda.memory_reserved() / (1024 ** 2)} MB")
55
 
56
  def unload_previous_model_if_needed(loaded_model_setup):
57
+ # Check if any GPU memory is being used even when loaded_model_setup is None
58
+ if loaded_model_setup is None:
59
+ if torch.cuda.is_available() and torch.cuda.memory_allocated() > 0:
60
+ print("Unknown model or tensors are still loaded on the GPU. Clearing GPU memory.")
61
+ # Call the function to find and move object to CPU
62
+ find_and_move_object_to_cpu()
63
+
64
+ return
65
+
66
  """Unload the current model from the GPU and free resources if a new model is being loaded."""
67
+
68
+ print("Unloading previous model from GPU to free memory.")
69
+
70
+ """
71
+ previous_model = loaded_model_setup[7] # Assuming pipe is at position [7] in the setup
72
+ # If the model is 'hyper-sd', ensure its components are moved to CPU before deletion
73
+ if loaded_model_setup[0].model == "hyper-sd":
74
+ if previous_model.device == torch.device('cuda'):
75
+ if hasattr(previous_model, 'unet'):
76
+ print("Moving UNet back to CPU.")
77
+ previous_model.unet.to('cpu') # Move unet to CPU
78
+
79
+ print("Moving entire pipeline back to CPU.")
80
+ previous_model.to('cpu') # Move the entire pipeline (pipe) to CPU
81
+ # For other models, use a generic 'to' function if available
82
+ elif hasattr(previous_model, 'to') and loaded_model_setup[0].model != "flux":
83
+ if previous_model.device == torch.device('cuda'):
84
+ print("Moving previous model back to CPU.")
85
  previous_model.to('cpu') # Move model to CPU to free GPU memory
86
+
87
+ # Delete the reference to the model to allow garbage collection
88
+ del previous_model
89
+ """
90
+ # Call the function to find and move object to CPU
91
+ find_and_move_object_to_cpu()
92
+
93
+ # Clear GPU memory
94
+ clear_gpu() # Ensure that this function properly clears memory (e.g., torch.cuda.empty_cache())
95
 
96
  def setup(args, loaded_model_setup=None):
97
  seed_everything(args.seed)
 
192
 
193
  # Final memory cleanup after model loading
194
  torch.cuda.empty_cache()
 
195
 
196
  trainer = LatentNoiseTrainer(
197
  reward_losses=reward_losses,
 
235
 
236
  # Final memory cleanup
237
  torch.cuda.empty_cache() # Free up cached memory
238
+
239
 
240
 
241
 
 
247
  def execute_task(args, trainer, device, dtype, shape, enable_grad, settings, pipe, progress_callback=None):
248
 
249
  if args.task == "single":
250
+
251
+
252
+
253
  # Attempt to move the model to GPU if model is not Flux
254
  if args.model != "flux":
255
+ if args.model == "hyper-sd":
256
  if pipe.device != torch.device('cuda'):
257
+ # Transfer UNet to GPU
258
+ pipe.unet = pipe.unet.to(device, dtype)
259
+ # Transfer the whole pipe to GPU, if required (optional)
260
+ pipe = pipe.to(device, dtype)
261
+ # upcast vae
262
+ pipe.vae = pipe.vae.to(dtype=torch.float32)
263
+ elif args.model == "pixart":
264
  if pipe.device != torch.device('cuda'):
265
  pipe.to(device)
266
+ else:
267
+ if pipe.device != torch.device('cuda'):
268
+ pipe.to(device, dtype)
269
  else:
 
 
270
 
271
  if args.cpu_offloading:
272
  pipe.enable_sequential_cpu_offload()
273
 
 
 
274
 
275
  if args.enable_multi_apply:
276
 
 
286
  multi_apply_fn = None
287
 
288
  torch.cuda.empty_cache() # Free up cached memory
289
+
290
+ print(f"PIPE:{pipe}")
291
 
292
  init_latents = torch.randn(shape, device=device, dtype=dtype)
293
  latents = torch.nn.Parameter(init_latents, requires_grad=enable_grad)
 
300
  best_image.save(f"{save_dir}/best_image.png")
301
  #init_image.save(f"{save_dir}/init_image.png")
302
 
303
+ # Move the pipe back to CPU
304
+ if args.model != "flux":
305
+ if args.model == "hyper-sd":
306
+ if pipe.device == torch.device('cuda'):
307
+ print("Moving the entire pipe back to CPU.")
308
+ # Transfer UNet to GPU
309
+ pipe.unet = pipe.unet.to("cpu")
310
+ pipe.to('cpu') # Move all components of the pipe back to CPU
311
+ # Delete the pipe to free resources
312
+ del pipe
313
+ print("Pipe deleted to free resources.")
314
+
315
+ else:
316
+ if pipe.device == torch.device('cuda'):
317
+ print("Moving the entire pipe back to CPU.")
318
+ pipe.to("cpu")
319
+ # Delete the pipe to free resources
320
+ del pipe
321
+ print("Pipe deleted to free resources.")
322
+
323
+ clear_gpu()
324
+
325
  elif args.task == "example-prompts":
326
  fo = open("assets/example_prompts.txt", "r")
327
  prompts = fo.readlines()