Spaces:

K00B404
/

pix2pix_flux_train

Running

App Files Files Community

pix2pix_flux_train / app.py

K00B404

Update app.py

6fa2447 verified about 2 months ago

raw

history blame

15.5 kB

	import torch
	import torch.nn as nn
	import torch.optim as optim
	from torch.utils.data import DataLoader
	from torchvision import transforms
	from datasets import load_dataset
	from huggingface_hub import Repository
	from huggingface_hub import HfApi, HfFolder, Repository, create_repo
	import os
	import pandas as pd
	import gradio as gr
	from PIL import Image
	import numpy as np
	from small_256_model import UNet as small_UNet
	from big_1024_model import UNet as big_UNet
	from CLIP import load as load_clip,load_vae,encode_prompt
	from rich import print as rp
	from diffusers import AutoencoderKL

	#url = "https://huggingface.co/stabilityai/sd-vae-ft-mse-original/blob/main/vae-ft-mse-840000-ema-pruned.safetensors" # can also be a local file
	#model = AutoencoderKL.from_single_file(url)
	# Device configuration
	device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
	big = False if device == torch.device('cpu') else True

	# Parameters
	IMG_SIZE = 1024 if big else 256
	BATCH_SIZE = 1 if big else 1
	EPOCHS = 12
	LR = 0.0002
	dataset_id = "K00B404/pix2pix_flux_set"
	model_repo_id = "K00B404/pix2pix_flux"

	# Global model variable
	global_model = None

	# CLIP and VAE
	clip_model, clip_tokenizer = load_clip()
	vae = load_vae()


	def load_model():
	"""Load the models at startup"""
	global global_model
	weights_name = 'big_model_weights.pth' if big else 'small_model_weights.pth'
	try:
	checkpoint = torch.load(weights_name, map_location=device)
	model = big_UNet() if checkpoint['model_config']['big'] else small_UNet()
	model.load_state_dict(checkpoint['model_state_dict'])
	model.to(device)
	model.eval()
	global_model = model
	rp("Model loaded successfully!")
	return model
	except Exception as e:
	rp(f"Error loading model: {e}")
	model = big_UNet().to(device) if big else small_UNet().to(device)
	global_model = model
	return model

	class Pix2PixDataset(torch.utils.data.Dataset):
	def __init__(self, combined_data, transform, clip_tokenizer,clip_model):
	self.data = combined_data
	self.transform = transform
	self.clip_tokenizer = clip_tokenizer
	self.original_folder = 'images_dataset/original/'
	self.target_folder = 'images_dataset/target/'

	def __len__(self):
	return len(self.data)

	def __getitem__(self, idx):
	original_img_filename = os.path.basename(self.data.iloc[idx]['image_path'])
	original_img_path = os.path.join(self.original_folder, original_img_filename)
	target_img_path = os.path.join(self.target_folder, original_img_filename)

	original_img = Image.open(original_img_path).convert('RGB')
	target_img = Image.open(target_img_path).convert('RGB')

	# Transform images
	original = self.transform(original_img)
	target = self.transform(target_img)

	# Get prompts from the DataFrame
	original_prompt = self.data.iloc[idx]['original_prompt']
	enhanced_prompt = self.data.iloc[idx]['enhanced_prompt']

	# Encode images
	original_image_latents = vae.encode(original_images).latent_dist.sample()
	target_image_latents = vae.encode(target_images).latent_dist.sample()

	# Encode prompts
	prompt_latents = encode_prompt(enhanced_prompt,clip_model,clip_tokenizer)

	# Pass these to your Pix2Pix model
	#generated_images = pix2pix_model(original_latents, prompt_latents)


	return original_image_latents,target_image_latents,prompt_latents
	# Tokenize the prompts using CLIP tokenizer
	#original_tokens = self.clip_tokenizer(original_prompt, return_tensors="pt", padding=True, truncation=True, max_length=77)
	#enhanced_tokens = self.clip_tokenizer(enhanced_prompt, return_tensors="pt", padding=True, truncation=True, max_length=77)

	#return original, target, original_tokens, enhanced_tokens



	class UNetWrapper:
	def __init__(self, unet_model, repo_id, epoch, loss, optimizer, scheduler=None):
	self.loss = loss
	self.epoch = epoch
	self.model = unet_model
	self.optimizer = optimizer
	self.scheduler = scheduler
	self.repo_id = repo_id
	self.token = os.getenv('NEW_TOKEN') # Ensure the token is set in the environment
	self.api = HfApi(token=self.token)

	def save_checkpoint(self, save_path):
	"""Save checkpoint with model, optimizer, and scheduler states."""
	self.save_dict = {
	'model_state_dict': self.model.state_dict(),
	'optimizer_state_dict': self.optimizer.state_dict(),
	'scheduler_state_dict': self.scheduler.state_dict() if self.scheduler else None,
	'model_config': {
	'big': isinstance(self.model, big_UNet),
	'img_size': 1024 if isinstance(self.model, big_UNet) else 256
	},
	'epoch': self.epoch,
	'loss': self.loss
	}
	torch.save(self.save_dict, save_path)
	print(f"Checkpoint saved at epoch {self.epoch}, loss: {self.loss}")

	def load_checkpoint(self, checkpoint_path):
	"""Load model, optimizer, and scheduler states from the checkpoint."""
	checkpoint = torch.load(checkpoint_path, map_location=device)
	self.model.load_state_dict(checkpoint['model_state_dict'])
	self.optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
	if self.scheduler and checkpoint['scheduler_state_dict']:
	self.scheduler.load_state_dict(checkpoint['scheduler_state_dict'])
	self.epoch = checkpoint['epoch']
	self.loss = checkpoint['loss']
	print(f"Checkpoint loaded: epoch {self.epoch}, loss: {self.loss}")

	def push_to_hub(self, pth_name):
	"""Push model checkpoint and metadata to the Hugging Face Hub."""
	try:
	self.api.upload_file(
	path_or_fileobj=pth_name,
	path_in_repo=pth_name,
	repo_id=self.repo_id,
	token=self.token,
	repo_type="model"
	)
	print(f"Model checkpoint successfully uploaded to {self.repo_id}")
	except Exception as e:
	print(f"Error uploading model: {e}")




	# Create and upload model card
	model_card = f"""---
	tags:
	- unet
	- pix2pix
	- pytorch
	library_name: pytorch
	license: wtfpl
	datasets:
	- K00B404/pix2pix_flux_set
	language:
	- en
	pipeline_tag: image-to-image
	---

	# Pix2Pix UNet Model

	## Model Description
	Custom UNet model for Pix2Pix image translation.
	- Image Size: {self.save_dict['model_config']['img_size']}
	- Model Type: {"big" if big else "small"}_UNet ({self.save_dict['model_config']['img_size']})

	## Usage

	```python
	import torch
	from small_256_model import UNet as small_UNet
	from big_1024_model import UNet as big_UNet
	big = True
	# Load the model
	name='big_model_weights.pth' if big else 'small_model_weights.pth'
	checkpoint = torch.load(name)
	model = big_UNet() if checkpoint['model_config']['big'] else small_UNet()
	model.load_state_dict(checkpoint['model_state_dict'])
	model.eval()
	```

	## Model Architecture

	{str(self.model)} """
	rp(model_card)
	try:
	# Save and upload README
	with open("README.md", "w") as f:
	f.write(f"# Pix2Pix UNet Model\n\n"
	f"- Image Size: {self.save_dict['model_config']['img_size']}\n"
	f"- Model Type: {'big' if big else 'small'}_UNet ({self.save_dict['model_config']['img_size']})\n"
	f"## Model Architecture\n{str(self.model)}")

	self.api.upload_file(
	path_or_fileobj="README.md",
	path_in_repo="README.md",
	repo_id=self.repo_id,
	token=self.token,
	repo_type="model"
	)

	# Clean up local files
	os.remove(pth_name)
	os.remove("README.md")

	print(f"Model successfully uploaded to {self.repo_id}")

	except Exception as e:
	print(f"Error uploading model: {e}")

	def prepare_input(image, device='cpu'):
	"""Prepare image for inference"""
	transform = transforms.Compose([
	transforms.Resize((IMG_SIZE, IMG_SIZE)),
	transforms.ToTensor(),
	])

	if isinstance(image, np.ndarray):
	image = Image.fromarray(image)
	input_tensor = transform(image).unsqueeze(0).to(device)
	return input_tensor

	def run_inference(image):
	"""Run inference on a single image"""
	global global_model
	if global_model is None:
	return "Error: Model not loaded"

	global_model.eval()
	input_tensor = prepare_input(image, device)

	with torch.no_grad():
	output = global_model(input_tensor)

	# Convert output to image
	output = output.cpu().squeeze(0).permute(1, 2, 0).numpy()
	output = ((output - output.min()) / (output.max() - output.min()) * 255).astype(np.uint8)
	rp(output[0])
	return output

	def to_hub(model, epoch, loss):
	wrapper = UNetWrapper(model, model_repo_id, epoch, loss)
	wrapper.push_to_hub()


	def train_model(epochs, save_interval=1):
	"""Training function with checkpoint saving and model uploading."""
	global global_model

	# Load combined data CSV
	data_path = 'combined_data.csv'
	combined_data = pd.read_csv(data_path)

	# Define the transformation
	transform = transforms.Compose([
	transforms.Resize((IMG_SIZE, IMG_SIZE)),
	transforms.ToTensor(),
	])

	# Initialize dataset and dataloader
	dataset = Pix2PixDataset(combined_data, transform, clip_tokenizer, clip_model)
	dataloader = DataLoader(dataset, batch_size=BATCH_SIZE, shuffle=True)

	model = global_model
	criterion = nn.L1Loss() # You may change this to suit your loss calculation needs
	optimizer = optim.Adam(model.parameters(), lr=LR)
	scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1) # Example scheduler
	wrapper = UNetWrapper(model, model_repo_id, epoch=0, loss=0.0, optimizer=optimizer, scheduler=scheduler)

	output_text = []

	for epoch in range(epochs):
	model.train()
	running_loss = 0.0

	for i, (latent_original, latent_target, latent_prompt) in enumerate(dataloader):
	# Move data to device
	latent_original, latent_target, latent_prompt = latent_original.to(device), latent_target.to(device), latent_prompt.to(device)

	optimizer.zero_grad()

	# Forward pass with the latents
	output = model(latent_target, latent_prompt) # Assuming your model can take both target and prompt latents

	# Calculate loss using the original latents
	img_loss = criterion(output, latent_original)
	total_loss = img_loss
	total_loss.backward()
	optimizer.step()

	running_loss += total_loss.item()

	if i % 10 == 0:
	status = f"Epoch [{epoch}/{epochs}], Step [{i}/{len(dataloader)}], Loss: {total_loss.item():.8f}"
	print(status)
	output_text.append(status)

	# Update the epoch and loss for checkpoint
	wrapper.epoch = epoch + 1
	wrapper.loss = running_loss / len(dataloader)

	# Save checkpoint at specified intervals
	if (epoch + 1) % save_interval == 0:
	checkpoint_path = f'big_checkpoint_epoch_{epoch+1}.pth' if big else f'small_checkpoint_epoch_{epoch+1}.pth'
	wrapper.save_checkpoint(checkpoint_path)
	wrapper.push_to_hub(checkpoint_path)

	scheduler.step() # Update learning rate scheduler

	global_model = model # Update global model after training
	return model, "\n".join(output_text)


	def train_model_old(epochs):
	"""Training function"""
	global global_model

	# Load combined data CSV
	data_path = 'combined_data.csv' # Adjust this path
	combined_data = pd.read_csv(data_path)

	# Define the transformation
	transform = transforms.Compose([
	transforms.Resize((IMG_SIZE, IMG_SIZE)),
	transforms.ToTensor(),
	])

	# Initialize the dataset and dataloader
	dataset = Pix2PixDataset(combined_data, transform, clip_tokenizer)
	dataloader = DataLoader(dataset, batch_size=BATCH_SIZE, shuffle=True)

	model = global_model
	criterion = nn.L1Loss() # L1 loss for image reconstruction
	optimizer = optim.Adam(model.parameters(), lr=LR)
	output_text = []

	for epoch in range(epochs):
	model.train()
	for i, (original, target, original_prompt_tokens, enhanced_prompt_tokens) in enumerate(dataloader):
	# Move images and prompt embeddings to the appropriate device (CPU or GPU)
	original, target = original.to(device), target.to(device)
	original_prompt_tokens = original_prompt_tokens.input_ids.to(device).float() # Convert to float
	enhanced_prompt_tokens = enhanced_prompt_tokens.input_ids.to(device).float() # Convert to float

	optimizer.zero_grad()

	# Forward pass through the model
	output = model(target)

	# Compute image reconstruction loss
	img_loss = criterion(output, original)
	rp(f"Image {i} Loss:{img_loss}")

	# Combine losses
	total_loss = img_loss # Add any other losses if necessary
	total_loss.backward()

	# Optimizer step
	optimizer.step()

	if i % 10 == 0:
	status = f"Epoch [{epoch}/{epochs}], Step [{i}/{len(dataloader)}], Loss: {total_loss.item():.8f}"
	rp(status)
	output_text.append(status)

	# Push model to Hugging Face Hub at the end of each epoch
	to_hub(model, epoch, total_loss)

	global_model = model # Update the global model after training
	return model, "\n".join(output_text)

	def gradio_train(epochs):
	# Gradio training interface function
	model, training_log = train_model(int(epochs))
	#to_hub(model)
	return f"{training_log}\n\nModel trained for {epochs} epochs and pushed to {model_repo_id}"

	def gradio_inference(input_image):
	# Gradio inference interface function
	output_image = run_inference(input_image) # Assuming `run_inference` returns a tuple (output_image, other_data)
	rp(output_image)
	# If `run_inference` returns a tuple, you should only return the image part
	return output_image # Ensure you're only returning the processed output image


	# Create Gradio interface with tabs
	with gr.Blocks() as app:
	gr.Markdown("# Pix2Pix Model Training and Inference")

	with gr.Tab("Train"):
	epochs_input = gr.Number(value=EPOCHS, label="Number of epochs")
	train_button = gr.Button("Train")
	training_output = gr.Textbox(label="Training Log", interactive=False)
	train_button.click(gradio_train, inputs=[epochs_input], outputs=[training_output])

	with gr.Tab("Inference"):
	image_input = gr.Image(type='numpy')
	prompt_input = gr.Textbox(label="Prompt")
	inference_button = gr.Button("Generate")
	inference_output = gr.Image(type='numpy', label="Generated Image")
	inference_button.click(gradio_inference, inputs=[image_input], outputs=[inference_output])

	load_model()
	app.launch()