Spaces:

tttoaster
/

SEED-X-17B

Build error

App Files Files Community

SEED-X-17B / src /inference /eval_text2img_seed_x.py

yuyingge

Add application file

590af54 5 months ago

raw

history blame contribute delete

No virus

3.69 kB

	import hydra
	import torch
	import os
	import pyrootutils
	from PIL import Image
	from omegaconf import OmegaConf
	from diffusers import AutoencoderKL, UNet2DConditionModel, EulerDiscreteScheduler


	pyrootutils.setup_root(__file__, indicator='.project-root', pythonpath=True)

	BOI_TOKEN = '<img>'
	EOI_TOKEN = '</img>'
	IMG_TOKEN = '<img_{:05d}>'

	device = 'cuda:0'
	device_2 = 'cuda:1'
	dtype = torch.float16
	dtype_str = 'fp16'
	num_img_in_tokens = 64
	num_img_out_tokens = 64

	instruction_prompt = '[INST] Generate an image: {caption} [/INST]\n'

	tokenizer_cfg_path = 'configs/tokenizer/clm_llama_tokenizer_224loc_anyres.yaml'
	image_transform_cfg_path = 'configs/processer/qwen_448_transform.yaml'
	visual_encoder_cfg_path = 'configs/visual_encoder/qwen_vitg_448.yaml'
	llm_cfg_path = 'configs/clm_models/llm_seed_x_i.yaml'
	agent_cfg_path = 'configs/clm_models/agent_seed_x_i.yaml'
	adapter_cfg_path = 'configs/sdxl_adapter/sdxl_qwen_vit_resampler_l4_q64_pretrain_no_normalize.yaml'
	discrete_model_cfg_path = 'configs/discrete_model/discrete_identity.yaml'

	diffusion_model_path = 'pretrained/stable-diffusion-xl-base-1.0'

	save_dir = 'vis'
	os.makedirs(save_dir, exist_ok=True)

	tokenizer_cfg = OmegaConf.load(tokenizer_cfg_path)
	tokenizer = hydra.utils.instantiate(tokenizer_cfg)

	image_transform_cfg = OmegaConf.load(image_transform_cfg_path)
	image_transform = hydra.utils.instantiate(image_transform_cfg)

	visual_encoder_cfg = OmegaConf.load(visual_encoder_cfg_path)
	visual_encoder = hydra.utils.instantiate(visual_encoder_cfg)
	visual_encoder.eval().to(device_2, dtype=dtype)
	print('Init visual encoder done')

	llm_cfg = OmegaConf.load(llm_cfg_path)
	llm = hydra.utils.instantiate(llm_cfg, torch_dtype=dtype)
	print('Init llm done.')

	agent_model_cfg = OmegaConf.load(agent_cfg_path)
	agent_model = hydra.utils.instantiate(agent_model_cfg, llm=llm)

	agent_model.eval().to(device, dtype=dtype)
	print('Init agent mdoel Done')

	noise_scheduler = EulerDiscreteScheduler.from_pretrained(diffusion_model_path, subfolder="scheduler")
	print('init vae')
	vae = AutoencoderKL.from_pretrained(diffusion_model_path, subfolder="vae").to(device_2, dtype=dtype)
	print('init unet')
	unet = UNet2DConditionModel.from_pretrained(diffusion_model_path, subfolder="unet").to(device_2, dtype=dtype)

	adapter_cfg = OmegaConf.load(adapter_cfg_path)
	adapter = hydra.utils.instantiate(adapter_cfg, unet=unet).to(device_2, dtype=dtype).eval()

	discrete_model_cfg = OmegaConf.load(discrete_model_cfg_path)
	discrete_model = hydra.utils.instantiate(discrete_model_cfg).to(device_2).eval()
	print('Init adapter done')

	adapter.init_pipe(vae=vae,
	scheduler=noise_scheduler,
	visual_encoder=visual_encoder,
	image_transform=image_transform,
	discrete_model=discrete_model,
	dtype=dtype,
	device=device_2)

	print('Init adapter pipe done')

	caption = 'A cybernetic soldier, enhanced with advanced weapons systems and tactical analysis software, on a mission behind enemy lines.'
	prompt = instruction_prompt.format_map({'caption': caption})
	prompt_ids = tokenizer.encode(prompt, add_special_tokens=False)
	input_ids = torch.tensor([tokenizer.bos_token_id] + prompt_ids).to(device, dtype=torch.long).unsqueeze(0)
	output = agent_model.generate(tokenizer=tokenizer, input_ids=input_ids, num_img_gen_tokens=num_img_out_tokens)
	print(output['has_img_output'])
	print(output['text'])

	if output['has_img_output']:
	images = adapter.generate(image_embeds=output['img_gen_feat'].to(device_2), num_inference_steps=50)
	save_path = os.path.join(save_dir, caption.replace('.', '') + '.png')
	images[0].save(save_path)
	torch.cuda.empty_cache()