Spaces:

xco2
/

small_diffusion

Runtime error

App Files Files Community

small_diffusion / app.py

xco2

改成cpu运行

fda67f2 12 months ago

raw

history blame

No virus

22.2 kB

	import random

	import gradio as gr
	import time, os
	import numpy as np
	import torch
	from tqdm import tqdm, trange
	from PIL import Image


	def random_clip(x, min=-1.5, max=1.5):
	if isinstance(x, np.ndarray):
	return np.clip(x, min, max)
	elif isinstance(x, torch.Tensor):
	return torch.clip(x, min, max)
	else:
	raise TypeError(f"type of x is {type(x)}")


	class Sampler:
	def __init__(self, device, normal_t):
	self.device = device
	self.total_step = 1000
	self.normal_t = normal_t

	self.afas_cumprod, self.betas = self.get_afa_bars("scaled_linear", # cosine,linear,scaled_linear
	self.total_step)
	self.afas_cumprod = torch.Tensor(self.afas_cumprod).to(self.device)
	self.betas = torch.Tensor(self.betas).to(self.device)

	def betas_for_alpha_bar(self, num_diffusion_timesteps, alpha_bar, max_beta=0.999):
	"""
	Create a beta schedule that discretizes the given alpha_t_bar function,
	which defines the cumulative product of (1-beta) over time from t = [0,1].

	:param num_diffusion_timesteps: the number of betas to produce.
	:param alpha_bar: a lambda that takes an argument t from 0 to 1 and
	produces the cumulative product of (1-beta) up to that
	part of the diffusion process.
	:param max_beta: the maximum beta to use; use values lower than 1 to
	prevent singularities.
	"""
	betas = []
	for i in range(num_diffusion_timesteps):
	t1 = i / num_diffusion_timesteps
	t2 = (i + 1) / num_diffusion_timesteps
	betas.append(min(1 - alpha_bar(t2) / alpha_bar(t1), max_beta))
	return np.array(betas)

	def get_named_beta_schedule(self, schedule_name, num_diffusion_timesteps):
	"""
	Get a pre-defined beta schedule for the given name.

	The beta schedule library consists of beta schedules which remain similar
	in the limit of num_diffusion_timesteps.
	Beta schedules may be added, but should not be removed or changed once
	they are committed to maintain backwards compatibility.
	"""
	if schedule_name == "linear":
	# Linear schedule from Ho et al, extended to work for any number of
	# diffusion steps.
	scale = 1000 / num_diffusion_timesteps
	beta_start = scale * 0.0001
	beta_end = scale * 0.02
	return np.linspace(
	beta_start, beta_end, num_diffusion_timesteps, dtype=np.float64
	)
	elif schedule_name == "scaled_linear":
	scale = 1000 / num_diffusion_timesteps
	beta_start = scale * 0.0001
	beta_end = scale * 0.02
	return np.linspace(
	beta_start 0.5, beta_end 0.5, num_diffusion_timesteps, dtype=np.float64) ** 2
	elif schedule_name == "cosine":
	return self.betas_for_alpha_bar(
	num_diffusion_timesteps,
	lambda t: np.cos((t + 0.008) / 1.008 * np.pi / 2) ** 2,
	)
	else:
	raise NotImplementedError(f"unknown beta schedule: {schedule_name}")

	def get_afa_bars(self, beta_schedule_name, total_step):
	"""
	生成afa bar的列表,列表长度为total_step
	:param beta_schedule_name: beta_schedule
	:return: afa_bars和betas
	"""

	# if linear:
	# # 线性
	# betas = np.linspace(1e-5, 0.1, self.total_step)
	#
	# else:
	# # sigmoid
	# betas = np.linspace(-6, 6, self.total_step)
	# betas = 1 / (1 + np.exp(betas)) * (afa_max - afa_min) + afa_min
	betas = self.get_named_beta_schedule(schedule_name=beta_schedule_name,
	num_diffusion_timesteps=total_step)

	afas = 1 - betas
	afas_cumprod = np.cumprod(afas)
	# afas_cumprod = np.concatenate((np.array([1]), afas_cumprod[:-1]), axis=0)
	return afas_cumprod, betas

	# 重全噪声开始
	@torch.no_grad()
	def sample_loop(self, model, vae_middle_c, batch_size, step, eta, shape=(32, 32)):
	pass

	def apple_noise(self, data, step):
	"""
	添加噪声,返回xt和噪声
	:param data: 数据,潜空间
	:param step: 选择的步数
	:return:
	"""
	data = data.to(self.device)

	noise = torch.randn(size=data.shape).to(self.device)
	afa_bar_t = self.afas_cumprod[step - 1]
	x_t = torch.sqrt(afa_bar_t) * data + torch.sqrt(1 - afa_bar_t) * noise
	return x_t

	# 图生图
	@torch.no_grad()
	def sample_loop_img2img(self, input_img, model, vae_middle_c, batch_size, step, eta):
	pass

	@torch.no_grad()
	def decode_img(self, vae, x0):
	x0 = vae.decoder(x0)
	res = x0.cpu().numpy()
	if vae.middle_c == 8:
	res = (res + 1) * 127.5
	else:
	res = res * 255
	res = np.transpose(res, [0, 2, 3, 1]) # RGB
	res = np.clip(res, 0, 255)
	res = np.array(res, dtype=np.uint8)
	return res

	@torch.no_grad()
	def encode_img(self, vae, x0):
	mu, _ = vae.encoder(x0)
	return mu


	class DDIMSampler(Sampler):
	def __init__(self, device, normal_t):
	super(DDIMSampler, self).__init__(device, normal_t)

	# self.afas_cumprod, self.betas = self.get_afa_bars("scaled_linear",
	# self.total_step) # cosine,linear,scaled_linear
	# self.afas_cumprod = torch.Tensor(self.afas_cumprod).to(self.device)
	# self.betas = torch.Tensor(self.betas).to(self.device)

	@torch.no_grad()
	def sample(self, model, x, t, next_t, eta):
	"""

	:param model:
	:param x:
	:param t: 属于[1,1000]
	:return:
	"""
	t_ = torch.ones((x.shape[0], 1)) * t
	t_ = t_.to(self.device)
	if self.normal_t:
	t_ = t_ / self.total_step
	epsilon = model(x, t_)
	# 把t转成index
	t = int(t - 1)
	next_t = int(next_t - 1)
	if t > 1:
	# pred_x0=(x-sqrt(1-afa_t_bar)ε)/(sqrt(afa_t_bar))
	prede_x0 = (x - torch.sqrt(1 - self.afas_cumprod[t]) * epsilon) / torch.sqrt(self.afas_cumprod[t])
	x_t_1 = torch.sqrt(self.afas_cumprod[next_t]) * prede_x0
	delta = eta * torch.sqrt((1 - self.afas_cumprod[next_t]) / (1 - self.afas_cumprod[t])) * torch.sqrt(
	1 - self.afas_cumprod[t] / self.afas_cumprod[next_t])
	x_t_1 = x_t_1 + torch.sqrt(1 - self.afas_cumprod[next_t] - delta ** 2) * epsilon
	x_t_1 = delta * random_clip(torch.randn_like(x)) + x_t_1
	else:
	coeff = self.betas[t] / (torch.sqrt(1 - self.afas_cumprod[t])) # + 1e-5
	x_t_1 = (1 / torch.sqrt(1 - self.betas[t])) * (x - coeff * epsilon)

	return x_t_1

	@torch.no_grad()
	def sample_loop(self, model, vae_middle_c, batch_size, step, eta, shape=(32, 32)):
	if step < 1000 and False:
	# 分两端均匀取子集
	# 1k步中的前35%用指定推理步数的50%
	big_steps = self.total_step * (1 - 0.4)
	big_ = int(step * 0.6)
	steps = np.linspace(self.total_step, big_steps, big_)
	steps = np.concatenate([steps, np.linspace(big_steps + int(steps[1] - steps[0]), 1, step - big_)],
	axis=0)
	else:
	# 均匀取子集
	steps = np.linspace(self.total_step, 1, step)
	steps = np.floor(steps)
	steps = np.concatenate((steps, steps[-1:]), axis=0)

	x_t = random_clip(torch.randn((batch_size, vae_middle_c, *shape))).to(self.device) # 32, 32
	for i in range(len(steps) - 1):
	x_t = self.sample(model, x_t, steps[i], steps[i + 1], eta)

	yield x_t

	@torch.no_grad()
	def sample_loop_img2img(self, input_img_latents, noise_steps, model, vae_middle_c, batch_size, step, eta):
	noised_latents = self.apple_noise(input_img_latents, noise_steps) # (1,4,32,32)
	step = min(noise_steps, step)
	if step < 1000 and False:
	# 分两端均匀取子集
	# 1k步中的前20%用指定推理步数的50%
	big_steps = noise_steps * (1 - 0.3)
	big_ = int(step * 0.5)
	steps = np.linspace(noise_steps, big_steps, big_)
	steps = np.concatenate([steps, np.linspace(big_steps + int(steps[1] - steps[0]), 1, step - big_)],
	axis=0)
	else:
	# 均匀取子集
	steps = np.linspace(noise_steps, 1, step)
	steps = np.floor(steps)
	steps = np.concatenate((steps, steps[-1:]), axis=0)

	x_t = torch.tile(noised_latents, (batch_size, 1, 1, 1)).to(self.device) # 32, 32
	for i in trange(len(steps) - 1):
	x_t = self.sample(model, x_t, steps[i], steps[i + 1], eta)

	yield x_t


	class EulerDpmppSampler(Sampler):
	def __init__(self, device, normal_t):
	super(EulerDpmppSampler, self).__init__(device, normal_t)
	self.sample_fun = self.sample_dpmpp_2m

	@staticmethod
	def append_zero(x):
	return torch.cat([x, x.new_zeros([1])])

	# 4e-5 0.99
	@staticmethod
	def get_sigmas_karras(n, sigma_min, sigma_max, rho=7., device='cuda'):
	"""Constructs the noise schedule of Karras et al. (2022)."""
	ramp = torch.linspace(0, 1, n)
	min_inv_rho = sigma_min ** (1 / rho)
	max_inv_rho = sigma_max ** (1 / rho)
	sigmas = (max_inv_rho + ramp * (min_inv_rho - max_inv_rho)) ** rho
	return EulerDpmppSampler.append_zero(sigmas).to(device)

	@staticmethod
	def default_noise_sampler(x):
	return lambda sigma, sigma_next: torch.randn_like(x)

	@staticmethod
	def get_ancestral_step(sigma_from, sigma_to, eta=1.):
	"""Calculates the noise level (sigma_down) to step down to and the amount
	of noise to add (sigma_up) when doing an ancestral sampling step."""
	if not eta:
	return sigma_to, 0.
	sigma_up = min(sigma_to, eta * (sigma_to ** 2 * (sigma_from 2 - sigma_to 2) / sigma_from 2) 0.5)
	sigma_down = (sigma_to 2 - sigma_up 2) ** 0.5
	return sigma_down, sigma_up

	@staticmethod
	def append_dims(x, target_dims):
	"""Appends dimensions to the end of a tensor until it has target_dims dimensions."""
	dims_to_append = target_dims - x.ndim
	if dims_to_append < 0:
	raise ValueError(f'input has {x.ndim} dims but target_dims is {target_dims}, which is less')
	return x[(...,) + (None,) * dims_to_append]

	@staticmethod
	def to_d(x, sigma, denoised):
	"""Converts a denoiser output to a Karras ODE derivative."""
	return (x - denoised) / EulerDpmppSampler.append_dims(sigma, x.ndim)

	@staticmethod
	def to_denoised(x, sigma, d):
	return x - d * EulerDpmppSampler.append_dims(sigma, x.ndim)

	@torch.no_grad()
	def sample_euler_ancestral(self, model, x, sigmas, extra_args=None, callback=None, disable=None, eta=1., s_noise=1.,
	noise_sampler=None):
	"""Ancestral sampling with Euler method steps."""
	extra_args = {} if extra_args is None else extra_args
	noise_sampler = EulerDpmppSampler.default_noise_sampler(x) if noise_sampler is None else noise_sampler
	s_in = x.new_ones([x.shape[0], 1])
	for i in trange(len(sigmas) - 1, disable=disable):
	t = sigmas[i] * (1 - 1 / self.total_step) + 1 / self.total_step
	t = torch.floor(t * self.total_step) # 不归一化t需要输入整数

	afa_bar_t = self.afas_cumprod[int(t) - 1] # 获得加噪用的afa bar
	if self.normal_t:
	t = t / self.total_step

	t = t * s_in
	output = model(x, t, **extra_args)
	denoised = (x - torch.sqrt(1 - afa_bar_t) * output) / torch.sqrt(afa_bar_t)

	sigma_down, sigma_up = self.get_ancestral_step(sigmas[i], sigmas[i + 1], eta=eta)
	if callback is not None:
	callback({'x': x, 'i': i, 'sigma': sigmas[i], 'sigma_hat': sigmas[i], 'denoised': denoised})
	d = self.to_d(x, sigmas[i], denoised)
	# d = denoised
	# Euler method
	dt = sigma_down - sigmas[i]
	x = x + d * dt
	if sigmas[i + 1] > 0:
	x = x + noise_sampler(sigmas[i], sigmas[i + 1]) * s_noise * sigma_up
	yield x
	# return x

	@torch.no_grad()
	def sample_dpmpp_2m(self, model, x, sigmas, extra_args=None, callback=None, disable=None):
	"""DPM-Solver++(2M)."""
	extra_args = {} if extra_args is None else extra_args
	s_in = x.new_ones([x.shape[0], 1])
	sigma_fn = lambda t: t.neg().exp()
	t_fn = lambda sigma: sigma.log().neg()
	old_denoised = None

	for i in trange(len(sigmas) - 1, disable=disable):
	t = sigmas[i] * (1 - 1 / self.total_step) + 1 / self.total_step
	t = torch.floor(t * self.total_step) # 不归一化t需要输入整数

	afa_bar_t = self.afas_cumprod[int(t) - 1] # 获得加噪用的afa bar
	if self.normal_t:
	t = t / self.total_step

	t = t * s_in
	output = model(x, t, **extra_args)
	denoised = (x - torch.sqrt(1 - afa_bar_t) * output) / torch.sqrt(afa_bar_t)

	if callback is not None:
	callback({'x': x, 'i': i, 'sigma': sigmas[i], 'sigma_hat': sigmas[i], 'denoised': denoised})
	t, t_next = t_fn(sigmas[i]), t_fn(sigmas[i + 1])
	h = t_next - t
	if old_denoised is None or sigmas[i + 1] == 0:
	x = (sigma_fn(t_next) / sigma_fn(t)) * x - (-h).expm1() * denoised
	else:
	h_last = t - t_fn(sigmas[i - 1])
	r = h_last / h
	denoised_d = (1 + 1 / (2 * r)) * denoised - (1 / (2 * r)) * old_denoised
	x = (sigma_fn(t_next) / sigma_fn(t)) * x - (-h).expm1() * denoised_d
	old_denoised = denoised
	yield x

	def switch_sampler(self, sampler_name):
	if sampler_name == "euler a":
	self.sample_fun = self.sample_euler_ancestral
	elif sampler_name == "dpmpp 2m":
	self.sample_fun = self.sample_dpmpp_2m
	else:
	self.sample_fun = self.sample_euler_ancestral

	def sample_loop(self, model, vae_middle_c, batch_size, step, eta, shape=(32, 32)):
	x = torch.randn((batch_size, vae_middle_c, 32, 32)).to(device)
	sigmas = self.get_sigmas_karras(step, 1e-5, 0.999, device=device)
	# sigmas = self.get_named_beta_schedule("scaled_linear", step)

	looper = self.sample_fun(unet, x, sigmas)
	for _ in trange(len(sigmas) - 1):
	x_t = next(looper)
	yield x_t


	class PretrainVae:
	def __init__(self, device):
	from diffusers import AutoencoderKL, DiffusionPipeline
	self.vae = AutoencoderKL.from_pretrained("gsdf/Counterfeit-V2.5", # segmind/small-sd
	subfolder="vae",
	cache_dir="./vae/pretrain_vae").to(device)
	self.vae.requires_grad_(False)
	self.middle_c = 4
	self.vae_scaleing = 0.18215

	def encoder(self, x):
	latents = self.vae.encode(x)
	latents = latents.latent_dist
	mean = latents.mean * self.vae_scaleing
	var = latents.var * self.vae_scaleing
	return mean, var

	def decoder(self, latents):
	latents = latents / self.vae_scaleing
	output = self.vae.decode(latents).sample
	return output

	# 释放encoder
	def res_encoder(self):
	del self.vae.encoder
	torch.cuda.empty_cache()


	# ================================================================

	def merge_images(images: np.ndarray):
	"""
	合并图像
	:param images: 图像数组
	:return: 合并后的图像数组
	"""
	n, h, w, c = images.shape
	nn = int(np.ceil(n ** 0.5))
	merged_image = np.zeros((h * nn, w * nn, 3), dtype=images.dtype)
	for i in range(n):
	row = i // nn
	col = i % nn
	merged_image[row * h:(row + 1) * h, col * w:(col + 1) * w, :] = images[i]

	merged_image = np.clip(merged_image, 0, 255)
	merged_image = np.array(merged_image, dtype=np.uint8)
	return merged_image


	def get_models(device):
	def modelLoad(model, model_path, data_parallel=False):
	model.load_state_dict(torch.load(model_path), strict=True)

	if data_parallel:
	model = torch.nn.DataParallel(model)
	return model

	from net.UNet import UNet
	config = {
	# 模型结构相关
	"en_out_c": (256, 256, 256, 320, 320, 320, 576, 576, 576, 704, 704, 704),
	"en_down": (0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0),
	"en_skip": (0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1),
	"en_att_heads": (8, 8, 8, 0, 8, 8, 0, 8, 8, 0, 8, 8),
	"de_out_c": (704, 576, 576, 576, 320, 320, 320, 256, 256, 256, 256),
	"de_up": ("none", "subpix", "none", "none", "subpix", "none", "none", "subpix", "none", "none", "none"),
	"de_skip": (1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0),
	"de_att_heads": (8, 8, 0, 8, 8, 0, 8, 8, 0, 8, 8), # skip的地方不做self-attention
	"t_out_c": 256,
	"vae_c": 4,
	"block_deep": 3,
	"use_pretrain_vae": True,

	"normal_t": True,

	"model_save_path": "./weight",
	"model_name": "unet",
	"model_tail": "ema",
	}
	print("加载模型...")
	unet = UNet(config["en_out_c"], config["en_down"], config["en_skip"], config["en_att_heads"],
	config["de_out_c"], config["de_up"], config["de_skip"], config["de_att_heads"],
	config["t_out_c"], config["vae_c"], config["block_deep"]).to(device)
	unet = modelLoad(unet, os.path.join(config["model_save_path"],
	f"{config['model_name']}_{config['model_tail']}.pth"))

	vae = PretrainVae(device)
	print("加载完成")
	return unet, vae, config["normal_t"]


	def init_webui(unet, vae, normal_t):
	# 定义回调函数
	def process_image(input_image_value, noise_step, step_value, batch_size, sampler_name, img_size,
	progress=gr.Progress()):
	progress(0, desc="开始...")

	noise_step = float(noise_step)
	step_value = int(step_value)
	batch_size = int(batch_size)
	img_size = int(img_size) // 8
	img_size = (img_size, img_size)

	if sampler_name == "DDIM":
	sampler = DDIMSampler(device, normal_t)
	elif sampler_name == "euler a" or sampler_name == "dpmpp 2m":
	sampler = EulerDpmppSampler(device, normal_t)
	sampler.switch_sampler(sampler_name)
	else:
	raise ValueError(f"Unknow sampler_name: {sampler_name}")
	if input_image_value is None:
	looper = sampler.sample_loop(unet, vae.middle_c, batch_size, step_value, shape=img_size, eta=1.)
	else:
	input_image_value = Image.fromarray(input_image_value).resize(img_size, Image.ANTIALIAS)
	input_image_value = np.array(input_image_value, dtype=np.float32) / 255.
	input_image_value = np.transpose(input_image_value, (2, 0, 1))
	input_image_value = torch.Tensor([input_image_value]).to(device)
	input_img_latents = sampler.encode_img(vae, input_image_value)
	looper = sampler.sample_loop_img2img(input_img_latents,
	int(noise_step * sampler.total_step),
	unet,
	vae.middle_c,
	batch_size,
	step_value,
	eta=1.)
	for i in progress.tqdm(range(1, step_value + 1)):
	output = next(looper)

	output = sampler.decode_img(vae, output)
	output = np.clip(output, 0, 255)
	marge_img = merge_images(output)

	output = [marge_img] + list(output)

	return output

	with gr.Blocks(title="图片处理") as iface:
	with gr.Column():
	with gr.Row():
	with gr.Column():
	# 创建输入组件
	input_image = gr.Image(label="输入图片")
	# 加噪程度
	noise_step = gr.Slider(minimum=0.05, maximum=1, value=0.6, label="加噪程度", step=0.01)
	with gr.Column():
	# 选择sampler
	sampler_name = gr.Dropdown(["DDIM"], label="sampler", value="DDIM") # , "euler a", "dpmpp 2m"
	# 创建滑动条组件
	step = gr.Slider(minimum=1, maximum=1000, value=40, label="步长", step=1)
	batch_size = gr.Slider(minimum=1, maximum=4, label="batch size", step=1)
	img_size = gr.Slider(minimum=256, maximum=512, value=256, label="img size", step=64)
	# 创建开始按钮组件
	start_button = gr.Button(label="开始")
	# 创建输出组件
	output_images = gr.Gallery(show_label=False, height=400, columns=5)

	start_button.click(process_image, [input_image, noise_step, step, batch_size, sampler_name, img_size],
	[output_images])

	return iface


	if __name__ == '__main__':
	device = torch.device('cpu')
	unet, vae, normal_t = get_models(device)


	def run_with_ui(unet, vae, normal_t):
	# 创建界面
	iface = init_webui(unet, vae, normal_t)

	# 运行界面
	iface.queue().launch() #


	run_with_ui(unet, vae, normal_t)