Spaces:

xco2
/

small_diffusion

Runtime error

App Files Files Community

xco2 commited on Sep 25, 2023

Commit

ebfe12f

•

1 Parent(s): 98545cc

init

Browse files

Files changed (7) hide show

app.py +546 -0
net/UNet.py +520 -0
requirements.txt +186 -0
vae/pretrain_vae/models--gsdf--Counterfeit-V2.5/refs/main +1 -0
vae/pretrain_vae/models--gsdf--Counterfeit-V2.5/snapshots/93c5412baf37cbfa23a3278f7b33b0328db581fb/vae/config.json +29 -0
vae/pretrain_vae/models--gsdf--Counterfeit-V2.5/snapshots/93c5412baf37cbfa23a3278f7b33b0328db581fb/vae/diffusion_pytorch_model.safetensors +3 -0
weight/unet_ema.pth +3 -0

app.py ADDED Viewed

	@@ -0,0 +1,546 @@

+import random
+import gradio as gr
+import time, os
+import numpy as np
+import torch
+from tqdm import tqdm, trange
+from PIL import Image
+def random_clip(x, min=-1.5, max=1.5):
+    if isinstance(x, np.ndarray):
+        return np.clip(x, min, max)
+    elif isinstance(x, torch.Tensor):
+        return torch.clip(x, min, max)
+    else:
+        raise TypeError(f"type of x is {type(x)}")
+class Sampler:
+    def __init__(self, device, normal_t):
+        self.device = device
+        self.total_step = 1000
+        self.normal_t = normal_t
+        self.afas_cumprod, self.betas = self.get_afa_bars("scaled_linear",  # cosine,linear,scaled_linear
+                                                          self.total_step)
+        self.afas_cumprod = torch.Tensor(self.afas_cumprod).to(self.device)
+        self.betas = torch.Tensor(self.betas).to(self.device)
+    def betas_for_alpha_bar(self, num_diffusion_timesteps, alpha_bar, max_beta=0.999):
+        """
+        Create a beta schedule that discretizes the given alpha_t_bar function,
+        which defines the cumulative product of (1-beta) over time from t = [0,1].
+        :param num_diffusion_timesteps: the number of betas to produce.
+        :param alpha_bar: a lambda that takes an argument t from 0 to 1 and
+                          produces the cumulative product of (1-beta) up to that
+                          part of the diffusion process.
+        :param max_beta: the maximum beta to use; use values lower than 1 to
+                         prevent singularities.
+        """
+        betas = []
+        for i in range(num_diffusion_timesteps):
+            t1 = i / num_diffusion_timesteps
+            t2 = (i + 1) / num_diffusion_timesteps
+            betas.append(min(1 - alpha_bar(t2) / alpha_bar(t1), max_beta))
+        return np.array(betas)
+    def get_named_beta_schedule(self, schedule_name, num_diffusion_timesteps):
+        """
+        Get a pre-defined beta schedule for the given name.
+        The beta schedule library consists of beta schedules which remain similar
+        in the limit of num_diffusion_timesteps.
+        Beta schedules may be added, but should not be removed or changed once
+        they are committed to maintain backwards compatibility.
+        """
+        if schedule_name == "linear":
+            # Linear schedule from Ho et al, extended to work for any number of
+            # diffusion steps.
+            scale = 1000 / num_diffusion_timesteps
+            beta_start = scale * 0.0001
+            beta_end = scale * 0.02
+            return np.linspace(
+                beta_start, beta_end, num_diffusion_timesteps, dtype=np.float64
+            )
+        elif schedule_name == "scaled_linear":
+            scale = 1000 / num_diffusion_timesteps
+            beta_start = scale * 0.0001
+            beta_end = scale * 0.02
+            return np.linspace(
+                beta_start ** 0.5, beta_end ** 0.5, num_diffusion_timesteps, dtype=np.float64) ** 2
+        elif schedule_name == "cosine":
+            return self.betas_for_alpha_bar(
+                num_diffusion_timesteps,
+                lambda t: np.cos((t + 0.008) / 1.008 * np.pi / 2) ** 2,
+            )
+        else:
+            raise NotImplementedError(f"unknown beta schedule: {schedule_name}")
+    def get_afa_bars(self, beta_schedule_name, total_step):
+        """
+        生成afa bar的列表,列表长度为total_step
+        :param beta_schedule_name: beta_schedule
+        :return: afa_bars和betas
+        """
+        # if linear:
+        #     # 线性
+        #     betas = np.linspace(1e-5, 0.1, self.total_step)
+        #
+        # else:
+        #     # sigmoid
+        #     betas = np.linspace(-6, 6, self.total_step)
+        #     betas = 1 / (1 + np.exp(betas)) * (afa_max - afa_min) + afa_min
+        betas = self.get_named_beta_schedule(schedule_name=beta_schedule_name,
+                                             num_diffusion_timesteps=total_step)
+        afas = 1 - betas
+        afas_cumprod = np.cumprod(afas)
+        # afas_cumprod = np.concatenate((np.array([1]), afas_cumprod[:-1]), axis=0)
+        return afas_cumprod, betas
+    # 重全噪声开始
+    @torch.no_grad()
+    def sample_loop(self, model, vae_middle_c, batch_size, step, eta, shape=(32, 32)):
+        pass
+    def apple_noise(self, data, step):
+        """
+        添加噪声,返回xt和噪声
+        :param data: 数据,潜空间
+        :param step: 选择的步数
+        :return:
+        """
+        data = data.to(self.device)
+        noise = torch.randn(size=data.shape).to(self.device)
+        afa_bar_t = self.afas_cumprod[step - 1]
+        x_t = torch.sqrt(afa_bar_t) * data + torch.sqrt(1 - afa_bar_t) * noise
+        return x_t
+    # 图生图
+    @torch.no_grad()
+    def sample_loop_img2img(self, input_img, model, vae_middle_c, batch_size, step, eta):
+        pass
+    @torch.no_grad()
+    def decode_img(self, vae, x0):
+        x0 = vae.decoder(x0)
+        res = x0.cpu().numpy()
+        if vae.middle_c == 8:
+            res = (res + 1) * 127.5
+        else:
+            res = res * 255
+        res = np.transpose(res, [0, 2, 3, 1])  # RGB
+        res = np.clip(res, 0, 255)
+        res = np.array(res, dtype=np.uint8)
+        return res
+    @torch.no_grad()
+    def encode_img(self, vae, x0):
+        mu, _ = vae.encoder(x0)
+        return mu
+class DDIMSampler(Sampler):
+    def __init__(self, device, normal_t):
+        super(DDIMSampler, self).__init__(device, normal_t)
+        # self.afas_cumprod, self.betas = self.get_afa_bars("scaled_linear",
+        #                                                   self.total_step)  # cosine,linear,scaled_linear
+        # self.afas_cumprod = torch.Tensor(self.afas_cumprod).to(self.device)
+        # self.betas = torch.Tensor(self.betas).to(self.device)
+    @torch.no_grad()
+    def sample(self, model, x, t, next_t, eta):
+        """
+        :param model:
+        :param x:
+        :param t: 属于[1,1000]
+        :return:
+        """
+        t_ = torch.ones((x.shape[0], 1)) * t
+        t_ = t_.to(self.device)
+        if self.normal_t:
+            t_ = t_ / self.total_step
+        epsilon = model(x, t_)
+        # 把t转成index
+        t = int(t - 1)
+        next_t = int(next_t - 1)
+        if t > 1:
+            # pred_x0=(x-sqrt(1-afa_t_bar)ε)/(sqrt(afa_t_bar))
+            prede_x0 = (x - torch.sqrt(1 - self.afas_cumprod[t]) * epsilon) / torch.sqrt(self.afas_cumprod[t])
+            x_t_1 = torch.sqrt(self.afas_cumprod[next_t]) * prede_x0
+            delta = eta * torch.sqrt((1 - self.afas_cumprod[next_t]) / (1 - self.afas_cumprod[t])) * torch.sqrt(
+                1 - self.afas_cumprod[t] / self.afas_cumprod[next_t])
+            x_t_1 = x_t_1 + torch.sqrt(1 - self.afas_cumprod[next_t] - delta ** 2) * epsilon
+            x_t_1 = delta * random_clip(torch.randn_like(x)) + x_t_1
+        else:
+            coeff = self.betas[t] / (torch.sqrt(1 - self.afas_cumprod[t]))  # + 1e-5
+            x_t_1 = (1 / torch.sqrt(1 - self.betas[t])) * (x - coeff * epsilon)
+        return x_t_1
+    @torch.no_grad()
+    def sample_loop(self, model, vae_middle_c, batch_size, step, eta, shape=(32, 32)):
+        if step < 1000 and False:
+            # 分两端均匀取子集
+            # 1k步中的前35%用指定推理步数的50%
+            big_steps = self.total_step * (1 - 0.4)
+            big_ = int(step * 0.6)
+            steps = np.linspace(self.total_step, big_steps, big_)
+            steps = np.concatenate([steps, np.linspace(big_steps + int(steps[1] - steps[0]), 1, step - big_)],
+                                   axis=0)
+        else:
+            # 均匀取子集
+            steps = np.linspace(self.total_step, 1, step)
+        steps = np.floor(steps)
+        steps = np.concatenate((steps, steps[-1:]), axis=0)
+        x_t = random_clip(torch.randn((batch_size, vae_middle_c, *shape))).to(self.device)  # 32, 32
+        for i in range(len(steps) - 1):
+            x_t = self.sample(model, x_t, steps[i], steps[i + 1], eta)
+            yield x_t
+    @torch.no_grad()
+    def sample_loop_img2img(self, input_img_latents, noise_steps, model, vae_middle_c, batch_size, step, eta):
+        noised_latents = self.apple_noise(input_img_latents, noise_steps)  # (1,4,32,32)
+        step = min(noise_steps, step)
+        if step < 1000 and False:
+            # 分两端均匀取子集
+            # 1k步中的前20%用指定推理步数的50%
+            big_steps = noise_steps * (1 - 0.3)
+            big_ = int(step * 0.5)
+            steps = np.linspace(noise_steps, big_steps, big_)
+            steps = np.concatenate([steps, np.linspace(big_steps + int(steps[1] - steps[0]), 1, step - big_)],
+                                   axis=0)
+        else:
+            # 均匀取子集
+            steps = np.linspace(noise_steps, 1, step)
+        steps = np.floor(steps)
+        steps = np.concatenate((steps, steps[-1:]), axis=0)
+        x_t = torch.tile(noised_latents, (batch_size, 1, 1, 1)).to(self.device)  # 32, 32
+        for i in trange(len(steps) - 1):
+            x_t = self.sample(model, x_t, steps[i], steps[i + 1], eta)
+            yield x_t
+class EulerDpmppSampler(Sampler):
+    def __init__(self, device, normal_t):
+        super(EulerDpmppSampler, self).__init__(device, normal_t)
+        self.sample_fun = self.sample_dpmpp_2m
+    @staticmethod
+    def append_zero(x):
+        return torch.cat([x, x.new_zeros([1])])
+    # 4e-5 0.99
+    @staticmethod
+    def get_sigmas_karras(n, sigma_min, sigma_max, rho=7., device='cuda'):
+        """Constructs the noise schedule of Karras et al. (2022)."""
+        ramp = torch.linspace(0, 1, n)
+        min_inv_rho = sigma_min ** (1 / rho)
+        max_inv_rho = sigma_max ** (1 / rho)
+        sigmas = (max_inv_rho + ramp * (min_inv_rho - max_inv_rho)) ** rho
+        return EulerDpmppSampler.append_zero(sigmas).to(device)
+    @staticmethod
+    def default_noise_sampler(x):
+        return lambda sigma, sigma_next: torch.randn_like(x)
+    @staticmethod
+    def get_ancestral_step(sigma_from, sigma_to, eta=1.):
+        """Calculates the noise level (sigma_down) to step down to and the amount
+        of noise to add (sigma_up) when doing an ancestral sampling step."""
+        if not eta:
+            return sigma_to, 0.
+        sigma_up = min(sigma_to, eta * (sigma_to ** 2 * (sigma_from ** 2 - sigma_to ** 2) / sigma_from ** 2) ** 0.5)
+        sigma_down = (sigma_to ** 2 - sigma_up ** 2) ** 0.5
+        return sigma_down, sigma_up
+    @staticmethod
+    def append_dims(x, target_dims):
+        """Appends dimensions to the end of a tensor until it has target_dims dimensions."""
+        dims_to_append = target_dims - x.ndim
+        if dims_to_append < 0:
+            raise ValueError(f'input has {x.ndim} dims but target_dims is {target_dims}, which is less')
+        return x[(...,) + (None,) * dims_to_append]
+    @staticmethod
+    def to_d(x, sigma, denoised):
+        """Converts a denoiser output to a Karras ODE derivative."""
+        return (x - denoised) / EulerDpmppSampler.append_dims(sigma, x.ndim)
+    @staticmethod
+    def to_denoised(x, sigma, d):
+        return x - d * EulerDpmppSampler.append_dims(sigma, x.ndim)
+    @torch.no_grad()
+    def sample_euler_ancestral(self, model, x, sigmas, extra_args=None, callback=None, disable=None, eta=1., s_noise=1.,
+                               noise_sampler=None):
+        """Ancestral sampling with Euler method steps."""
+        extra_args = {} if extra_args is None else extra_args
+        noise_sampler = EulerDpmppSampler.default_noise_sampler(x) if noise_sampler is None else noise_sampler
+        s_in = x.new_ones([x.shape[0], 1])
+        for i in trange(len(sigmas) - 1, disable=disable):
+            t = sigmas[i] * (1 - 1 / self.total_step) + 1 / self.total_step
+            t = torch.floor(t * self.total_step)  # 不归一化t需要输入整数
+            afa_bar_t = self.afas_cumprod[int(t) - 1]  # 获得加噪用的afa bar
+            if self.normal_t:
+                t = t / self.total_step
+            t = t * s_in
+            output = model(x, t, **extra_args)
+            denoised = (x - torch.sqrt(1 - afa_bar_t) * output) / torch.sqrt(afa_bar_t)
+            sigma_down, sigma_up = self.get_ancestral_step(sigmas[i], sigmas[i + 1], eta=eta)
+            if callback is not None:
+                callback({'x': x, 'i': i, 'sigma': sigmas[i], 'sigma_hat': sigmas[i], 'denoised': denoised})
+            d = self.to_d(x, sigmas[i], denoised)
+            # d = denoised
+            # Euler method
+            dt = sigma_down - sigmas[i]
+            x = x + d * dt
+            if sigmas[i + 1] > 0:
+                x = x + noise_sampler(sigmas[i], sigmas[i + 1]) * s_noise * sigma_up
+            yield x
+        # return x
+    @torch.no_grad()
+    def sample_dpmpp_2m(self, model, x, sigmas, extra_args=None, callback=None, disable=None):
+        """DPM-Solver++(2M)."""
+        extra_args = {} if extra_args is None else extra_args
+        s_in = x.new_ones([x.shape[0], 1])
+        sigma_fn = lambda t: t.neg().exp()
+        t_fn = lambda sigma: sigma.log().neg()
+        old_denoised = None
+        for i in trange(len(sigmas) - 1, disable=disable):
+            t = sigmas[i] * (1 - 1 / self.total_step) + 1 / self.total_step
+            t = torch.floor(t * self.total_step)  # 不归一化t需要输入整数
+            afa_bar_t = self.afas_cumprod[int(t) - 1]  # 获得加噪用的afa bar
+            if self.normal_t:
+                t = t / self.total_step
+            t = t * s_in
+            output = model(x, t, **extra_args)
+            denoised = (x - torch.sqrt(1 - afa_bar_t) * output) / torch.sqrt(afa_bar_t)
+            if callback is not None:
+                callback({'x': x, 'i': i, 'sigma': sigmas[i], 'sigma_hat': sigmas[i], 'denoised': denoised})
+            t, t_next = t_fn(sigmas[i]), t_fn(sigmas[i + 1])
+            h = t_next - t
+            if old_denoised is None or sigmas[i + 1] == 0:
+                x = (sigma_fn(t_next) / sigma_fn(t)) * x - (-h).expm1() * denoised
+            else:
+                h_last = t - t_fn(sigmas[i - 1])
+                r = h_last / h
+                denoised_d = (1 + 1 / (2 * r)) * denoised - (1 / (2 * r)) * old_denoised
+                x = (sigma_fn(t_next) / sigma_fn(t)) * x - (-h).expm1() * denoised_d
+            old_denoised = denoised
+            yield x
+    def switch_sampler(self, sampler_name):
+        if sampler_name == "euler a":
+            self.sample_fun = self.sample_euler_ancestral
+        elif sampler_name == "dpmpp 2m":
+            self.sample_fun = self.sample_dpmpp_2m
+        else:
+            self.sample_fun = self.sample_euler_ancestral
+    def sample_loop(self, model, vae_middle_c, batch_size, step, eta, shape=(32, 32)):
+        x = torch.randn((batch_size, vae_middle_c, 32, 32)).to(device)
+        sigmas = self.get_sigmas_karras(step, 1e-5, 0.999, device=device)
+        # sigmas = self.get_named_beta_schedule("scaled_linear", step)
+        looper = self.sample_fun(unet, x, sigmas)
+        for _ in trange(len(sigmas) - 1):
+            x_t = next(looper)
+            yield x_t
+class PretrainVae:
+    def __init__(self, device):
+        from diffusers import AutoencoderKL, DiffusionPipeline
+        self.vae = AutoencoderKL.from_pretrained("gsdf/Counterfeit-V2.5",  # segmind/small-sd
+                                                 subfolder="vae",
+                                                 cache_dir="./vae/pretrain_vae").to(device)
+        self.vae.requires_grad_(False)
+        self.middle_c = 4
+        self.vae_scaleing = 0.18215
+    def encoder(self, x):
+        latents = self.vae.encode(x)
+        latents = latents.latent_dist
+        mean = latents.mean * self.vae_scaleing
+        var = latents.var * self.vae_scaleing
+        return mean, var
+    def decoder(self, latents):
+        latents = latents / self.vae_scaleing
+        output = self.vae.decode(latents).sample
+        return output
+    # 释放encoder
+    def res_encoder(self):
+        del self.vae.encoder
+        torch.cuda.empty_cache()
+# ================================================================
+def merge_images(images: np.ndarray):
+    """
+    合并图像
+    :param images: 图像数组
+    :return: 合并后的图像数组
+    """
+    n, h, w, c = images.shape
+    nn = int(np.ceil(n ** 0.5))
+    merged_image = np.zeros((h * nn, w * nn, 3), dtype=images.dtype)
+    for i in range(n):
+        row = i // nn
+        col = i % nn
+        merged_image[row * h:(row + 1) * h, col * w:(col + 1) * w, :] = images[i]
+    merged_image = np.clip(merged_image, 0, 255)
+    merged_image = np.array(merged_image, dtype=np.uint8)
+    return merged_image
+def get_models(device):
+    def modelLoad(model, model_path, data_parallel=False):
+        model.load_state_dict(torch.load(model_path), strict=True)
+        if data_parallel:
+            model = torch.nn.DataParallel(model)
+        return model
+    from net.UNet import UNet
+    config = {
+        # 模型结构相关
+        "en_out_c": (256, 256, 256, 320, 320, 320, 576, 576, 576, 704, 704, 704),
+        "en_down": (0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0),
+        "en_skip": (0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1),
+        "en_att_heads": (8, 8, 8, 0, 8, 8, 0, 8, 8, 0, 8, 8),
+        "de_out_c": (704, 576, 576, 576, 320, 320, 320, 256, 256, 256, 256),
+        "de_up": ("none", "subpix", "none", "none", "subpix", "none", "none", "subpix", "none", "none", "none"),
+        "de_skip": (1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0),
+        "de_att_heads": (8, 8, 0, 8, 8, 0, 8, 8, 0, 8, 8),  # skip的地方不做self-attention
+        "t_out_c": 256,
+        "vae_c": 4,
+        "block_deep": 3,
+        "use_pretrain_vae": True,
+        "normal_t": True,
+        "model_save_path": "./weight",
+        "model_name": "unet",
+        "model_tail": "ema",
+    }
+    print("加载模型...")
+    unet = UNet(config["en_out_c"], config["en_down"], config["en_skip"], config["en_att_heads"],
+                config["de_out_c"], config["de_up"], config["de_skip"], config["de_att_heads"],
+                config["t_out_c"], config["vae_c"], config["block_deep"]).to(device)
+    unet = modelLoad(unet, os.path.join(config["model_save_path"],
+                                        f"{config['model_name']}_{config['model_tail']}.pth"))
+    vae = PretrainVae(device)
+    print("加载完成")
+    return unet, vae, config["normal_t"]
+def init_webui(unet, vae, normal_t):
+    # 定义回调函数
+    def process_image(input_image_value, noise_step, step_value, batch_size, sampler_name, img_size,
+                      progress=gr.Progress()):
+        progress(0, desc="开始...")
+        noise_step = float(noise_step)
+        step_value = int(step_value)
+        batch_size = int(batch_size)
+        img_size = int(img_size) // 8
+        img_size = (img_size, img_size)
+        if sampler_name == "DDIM":
+            sampler = DDIMSampler(device, normal_t)
+        elif sampler_name == "euler a" or sampler_name == "dpmpp 2m":
+            sampler = EulerDpmppSampler(device, normal_t)
+            sampler.switch_sampler(sampler_name)
+        else:
+            raise ValueError(f"Unknow sampler_name: {sampler_name}")
+        if input_image_value is None:
+            looper = sampler.sample_loop(unet, vae.middle_c, batch_size, step_value, shape=img_size, eta=1.)
+        else:
+            input_image_value = Image.fromarray(input_image_value).resize(img_size, Image.ANTIALIAS)
+            input_image_value = np.array(input_image_value, dtype=np.float32) / 255.
+            input_image_value = np.transpose(input_image_value, (2, 0, 1))
+            input_image_value = torch.Tensor([input_image_value]).to(device)
+            input_img_latents = sampler.encode_img(vae, input_image_value)
+            looper = sampler.sample_loop_img2img(input_img_latents,
+                                                 int(noise_step * sampler.total_step),
+                                                 unet,
+                                                 vae.middle_c,
+                                                 batch_size,
+                                                 step_value,
+                                                 eta=1.)
+        for i in progress.tqdm(range(1, step_value + 1)):
+            output = next(looper)
+        output = sampler.decode_img(vae, output)
+        output = np.clip(output, 0, 255)
+        marge_img = merge_images(output)
+        output = [marge_img] + list(output)
+        return output
+    with gr.Blocks(title="图片处理") as iface:
+        with gr.Column():
+            with gr.Row():
+                with gr.Column():
+                    # 创建输入组件
+                    input_image = gr.Image(label="输入图片")
+                    # 加噪程度
+                    noise_step = gr.Slider(minimum=0.05, maximum=1, value=0.6, label="加噪程度", step=0.01)
+                with gr.Column():
+                    # 选择sampler
+                    sampler_name = gr.Dropdown(["DDIM"], label="sampler", value="DDIM")  # , "euler a", "dpmpp 2m"
+                    # 创建滑动条组件
+                    step = gr.Slider(minimum=1, maximum=1000, value=400, label="步长", step=1)
+                    batch_size = gr.Slider(minimum=1, maximum=4, label="batch size", step=1)
+                    img_size = gr.Slider(minimum=256, maximum=512, value=256, label="img size", step=64)
+                    # 创建开始按钮组件
+                    start_button = gr.Button(label="开始")
+            # 创建输出组件
+            output_images = gr.Gallery(show_label=False, height=400, columns=5)
+        start_button.click(process_image, [input_image, noise_step, step, batch_size, sampler_name, img_size],
+                           [output_images])
+    return iface
+if __name__ == '__main__':
+    device = "cuda"
+    unet, vae, normal_t = get_models(device)
+    def run_with_ui(unet, vae, normal_t):
+        # 创建界面
+        iface = init_webui(unet, vae, normal_t)
+        # 运行界面
+        iface.queue().launch()  #
+    run_with_ui(unet, vae, normal_t)

net/UNet.py ADDED Viewed

	@@ -0,0 +1,520 @@

+"""
+att_uncontrol9_adam以及之前的都是用这个
+"""
+import numpy as np
+import torch
+import torch.nn as nn
+import math
+class SubPixelConv(nn.Module):
+    def __init__(self, in_channels, out_channels, kernel_size, stride=1, scale_factor=2):
+        super(SubPixelConv, self).__init__()
+        self.conv = nn.Conv2d(in_channels, out_channels * scale_factor ** 2, kernel_size, stride,
+                              padding=kernel_size // 2)
+        self.pixel_shuffle = nn.PixelShuffle(scale_factor)
+    def forward(self, x):
+        x = self.conv(x)
+        x = self.pixel_shuffle(x)
+        return x
+class Swish(nn.Module):
+    def __init__(self):
+        super(Swish, self).__init__()
+    def forward(self, x):
+        # swish
+        return x * torch.sigmoid(x)
+def zero_module(module):
+    """
+    Zero out the parameters of a module and return it.
+    """
+    for p in module.parameters():
+        p.detach().zero_()
+    return module
+class AttentionBlock(nn.Module):
+    """
+    An attention block that allows spatial positions to attend to each other.
+    Originally ported from here, but adapted to the N-d case.
+    https://github.com/hojonathanho/diffusion/blob/1e0dceb3b3495bbe19116a5e1b3596cd0706c543/diffusion_tf/models/unet.py#L66.
+    """
+    def __init__(self, channels, num_heads=-1, use_checkpoint=False):
+        super().__init__()
+        self.channels = channels
+        self.num_heads = num_heads if num_heads != -1 else min(channels // 32, 8)
+        self.use_checkpoint = use_checkpoint
+        self.norm = nn.GroupNorm(16, channels, eps=1e-6)
+        self.qkv = nn.Conv1d(channels, channels * 3, 1)
+        self.attention = QKVAttention()
+        self.proj_out = zero_module(nn.Conv1d(channels, channels, 1))
+    def forward(self, x):
+        b, c, *spatial = x.shape
+        x = x.reshape(b, c, -1)
+        qkv = self.qkv(self.norm(x))
+        qkv = qkv.reshape(b * self.num_heads, -1, qkv.shape[2])
+        h = self.attention(qkv)
+        h = h.reshape(b, -1, h.shape[-1])
+        h = self.proj_out(h)
+        return (x + h).reshape(b, c, *spatial)
+class QKVAttention(nn.Module):
+    """
+    A module which performs QKV attention.
+    """
+    def forward(self, qkv):
+        """
+        Apply QKV attention.
+        :param qkv: an [N x (C * 3) x T] tensor of Qs, Ks, and Vs.
+        :return: an [N x C x T] tensor after attention.
+        """
+        ch = qkv.shape[1] // 3
+        q, k, v = torch.split(qkv, ch, dim=1)
+        scale = 1 / math.sqrt(math.sqrt(ch))
+        weight = torch.einsum(
+            "bct,bcs->bts", q * scale, k * scale
+        )  # More stable with f16 than dividing afterwards
+        weight = torch.softmax(weight.float(), dim=-1).type(weight.dtype)
+        return torch.einsum("bts,bcs->bct", weight, v)
+    @staticmethod
+    def count_flops(model, _x, y):
+        """
+        A counter for the `thop` package to count the operations in an
+        attention operation.
+        Meant to be used like:
+            macs, params = thop.profile(
+                model,
+                inputs=(inputs, timestamps),
+                custom_ops={QKVAttention: QKVAttention.count_flops},
+            )
+        """
+        b, c, *spatial = y[0].shape
+        num_spatial = int(np.prod(spatial))
+        # We perform two matmuls with the same number of ops.
+        # The first computes the weight matrix, the second computes
+        # the combination of the value vectors.
+        matmul_ops = 2 * b * (num_spatial ** 2) * c
+        model.total_ops += torch.DoubleTensor([matmul_ops])
+# ====================================================================
+class TEncoder(nn.Module):
+    def __init__(self, out_c=256, scale=30.):
+        super(TEncoder, self).__init__()
+        # 随机映射
+        self.out_c = out_c
+        self.W = nn.Parameter(torch.randn(out_c // 2) * scale, requires_grad=False)
+        self.linear = nn.Sequential(nn.Linear(out_c, out_c),
+                                    Swish(),
+                                    nn.Linear(out_c, out_c),
+                                    )
+    def timestep_embedding(self, timesteps, max_period=10000):
+        """
+        Create sinusoidal timestep embeddings.
+        :param timesteps: a 1-D Tensor of N indices, one per batch element.
+                          These may be fractional.
+        :param dim: the dimension of the output.
+        :param max_period: controls the minimum frequency of the embeddings.
+        :return: an [N x dim] Tensor of positional embeddings.
+        """
+        half = self.out_c // 2
+        freqs = torch.exp(
+            -math.log(max_period) * torch.arange(start=0, end=half, dtype=torch.float32) / half
+        ).to(device=timesteps.device)
+        args = timesteps[:, None].float() * freqs[None]
+        embedding = torch.cat([torch.cos(args), torch.sin(args)], dim=-1)
+        if self.out_c % 2:
+            embedding = torch.cat([embedding, torch.zeros_like(embedding[:, :1])], dim=-1)
+        return embedding
+    def forward(self, t):
+        # t_proj = t * self.W[None, :] * 2 * np.pi
+        # t_proj = torch.cat((torch.sin(t_proj), torch.cos(t_proj)), dim=-1)
+        t_proj = self.timestep_embedding(t)[:, 0, :]
+        encoded_t = self.linear(t_proj)
+        return encoded_t
+class EncoderBlock(nn.Module):
+    def __init__(self, in_c, out_c, kernel_size, stride, t_in_c, att_num_head=-1, block_deep=4):
+        super(EncoderBlock, self).__init__()
+        self.in_c = in_c
+        self.out_c = out_c
+        self.stride = stride
+        self.model_list_len = block_deep  # 一个block有多少次卷积
+        padding = kernel_size // 2
+        self.model_list = nn.ModuleList()
+        self.model_list.append(nn.Sequential(
+            nn.Conv2d(in_c, out_c, kernel_size=kernel_size, stride=stride, padding=padding),
+            nn.GroupNorm(16, out_c, eps=1e-6),
+            Swish()))
+        if att_num_head != 0:  # stride == 1
+            self.att_block = AttentionBlock(out_c, num_heads=att_num_head)
+        else:
+            self.att_block = nn.Identity()
+        for _ in range(self.model_list_len - 2):  # -2是减一头一尾
+            self.model_list.append(
+                nn.Sequential(
+                    nn.Conv2d(out_c, out_c, kernel_size=kernel_size, stride=1,
+                              padding=padding),
+                    nn.GroupNorm(16, out_c, eps=1e-6),
+                    Swish(),
+                ))
+        self.model_list.append(
+            nn.Sequential(
+                nn.Conv2d(out_c, out_c, kernel_size=kernel_size, stride=1,
+                          padding=padding),
+                nn.GroupNorm(16, out_c, eps=1e-6),
+            ))
+        # 编码时间t
+        self.encode_t = nn.ModuleList(
+            [nn.Linear(t_in_c, out_c) for _ in range(len(self.model_list) - 1)])
+        if self.in_c != self.out_c or self.stride != 1:
+            self.conv_skip = nn.Conv2d(in_c, out_c, kernel_size=1, stride=stride, padding=0)
+        else:
+            self.conv_skip = nn.Identity()
+        self.act_skip = Swish()
+    def forward(self, x, t):
+        skip = self.conv_skip(x)
+        for i, layer in enumerate(self.model_list):
+            x = layer(x)
+            if i == 0:
+                x = self.att_block(x)
+            if i < self.model_list_len - 1:
+                t_ = self.encode_t[i](t)
+                # t_ = torch.tile(t[:, :, None, None], dims=[1, 1, x.shape[2], x.shape[3]])
+                t_ = t_[:, :, None, None]
+                x = x + t_
+        return self.act_skip(x + skip)
+class DecoderBlock(nn.Module):
+    def __init__(self, in_c, out_c, kernel_size, upsample="none", t_in_c=256, att_num_head=-1, block_deep=4):
+        super(DecoderBlock, self).__init__()
+        self.in_c = in_c
+        self.out_c = out_c
+        self.model_list_len = block_deep  # 一个block有多少次卷积
+        self.model_list = nn.ModuleList()
+        if upsample == "subpix":
+            self.model_list.append(nn.Sequential(
+                SubPixelConv(in_c, out_c, kernel_size=3),
+                nn.GroupNorm(16, out_c, eps=1e-6),
+                Swish()
+            ))
+            self.upsample = SubPixelConv(in_c, in_c, kernel_size=3)
+        elif upsample == "convt":
+            self.model_list.append(nn.Sequential(
+                nn.ConvTranspose2d(in_c, out_c, kernel_size=4, stride=2, padding=1),
+                nn.GroupNorm(16, out_c, eps=1e-6),
+                Swish()
+            ))
+            self.upsample = nn.ConvTranspose2d(in_c, in_c, kernel_size=4, stride=2, padding=1)
+        else:
+            self.model_list.append(nn.Sequential(
+                nn.Conv2d(in_c, out_c, kernel_size=kernel_size, stride=1,
+                          padding=kernel_size // 2),
+                nn.GroupNorm(16, out_c, eps=1e-6),
+                Swish()
+            ))
+            self.upsample = nn.Identity()
+        if att_num_head != 0:  # upsample != "none"
+            self.att_block = AttentionBlock(out_c, num_heads=att_num_head)
+        else:
+            self.att_block = nn.Identity()
+        for _ in range(self.model_list_len - 2):
+            self.model_list.append(nn.Sequential(nn.Conv2d(out_c, out_c, kernel_size=kernel_size, stride=1,
+                                                           padding=kernel_size // 2),
+                                                 nn.GroupNorm(16, out_c, eps=1e-6),
+                                                 Swish()))
+        self.model_list.append(nn.Sequential(nn.Conv2d(out_c, out_c, kernel_size=kernel_size, stride=1,
+                                                       padding=kernel_size // 2),
+                                             nn.GroupNorm(16, out_c, eps=1e-6)))
+        # 编码时间t
+        self.encode_t = nn.ModuleList([nn.Linear(t_in_c, out_c) for _ in range(len(self.model_list) - 1)])
+        self.conv_skip = nn.Conv2d(in_c, out_c, kernel_size=1, stride=1, padding=0)
+        self.act_skip = Swish()
+    def forward(self, x, t):
+        skip = self.upsample(x)
+        skip = self.conv_skip(skip)
+        for i, layer in enumerate(self.model_list):
+            x = layer(x)
+            if i == 0:
+                x = self.att_block(x)
+            if i < self.model_list_len - 1:
+                t_ = self.encode_t[i](t)
+                # t_ = torch.tile(t[:, :, None, None], dims=[1, 1, x.shape[2], x.shape[3]])
+                t_ = t_[:, :, None, None]
+                x = x + t_
+        return self.act_skip(x + skip)
+class Encoder(nn.Module):
+    def __init__(self,
+                 model_in_c=8,
+                 out_cs=(64, 64, 128, 128, 256, 256, 512, 512),
+                 down_sample=(0, 0, 1, 0, 1, 0, 1, 0),
+                 skip_out=(0, 1, 0, 1, 0, 1, 0, 1),
+                 att_num_heads=(-1, -1, -1, -1, -1, -1, -1, -1),
+                 t_in_c=256,
+                 block_deep=4):
+        """
+        :param out_cs: 每一个块输出的尺寸
+        :param down_sample: 是否下采样
+        :param skip_out: unet的条连
+        """
+        super(Encoder, self).__init__()
+        self.skip_out = skip_out
+        self.model_list = nn.ModuleList()
+        for i, (out_c, down, att_num_head) in enumerate(zip(out_cs, down_sample, att_num_heads)):
+            in_c = model_in_c if i == 0 else out_cs[i - 1]
+            self.model_list.append(
+                EncoderBlock(in_c, out_cs[i], kernel_size=3, stride=down + 1, t_in_c=t_in_c,
+                             att_num_head=att_num_head, block_deep=block_deep))
+    def forward(self, x, t):
+        res_x = []
+        for i, layer in enumerate(self.model_list):
+            x = layer(x, t)
+            if self.skip_out[i] == 1:
+                res_x.append(x)
+        return res_x
+class Decoder(nn.Module):
+    def __init__(self,
+                 in_c,
+                 model_out_c=8,
+                 out_cs=(512, 256, 256, 128, 128, 64, 64, 32),
+                 up_sample=("none", "convt", "none", "subpix", "none", "subpix", "none", "none"),
+                 skip_out=(1, 0, 1, 0, 1, 0, 1, 0),
+                 att_num_heads=(-1, -1, -1, -1, -1, -1, -1, -1),
+                 t_in_c=256,
+                 block_deep=4):
+        """
+        :param out_cs: 每一个块输出的尺寸
+        :param up_sample: 上采样方法,none是不进行上采样
+        :param skip_out: unet的跳连
+        """
+        super(Decoder, self).__init__()
+        self.skip_out = skip_out
+        self.model_list = nn.ModuleList()
+        for i, (out_c, up, att_num_head) in enumerate(zip(out_cs, up_sample, att_num_heads)):
+            if self.skip_out[i] == 1 and i > 0:
+                in_c *= 2
+            self.model_list.append(
+                DecoderBlock(in_c, out_cs[i], kernel_size=3, upsample=up, t_in_c=t_in_c,
+                             att_num_head=att_num_head, block_deep=block_deep))
+            in_c = out_cs[i]
+        self.Conv1 = nn.Conv2d(out_cs[-1], model_out_c, kernel_size=1, stride=1, padding=0)
+    def forward(self, x, t):
+        x_list = x
+        # print([xx.shape for xx in x_list])
+        x = None
+        for i, layer in enumerate(self.model_list):
+            if self.skip_out[i] == 1:
+                # print("skip_x:", x_list[-1].shape)
+                if i == 0:
+                    x = x_list.pop()
+                else:
+                    x = torch.cat([x, x_list.pop()], dim=1)
+            # print("x:", x.shape)
+            x = layer(x, t)
+        x = self.Conv1(x)
+        return x
+class UNet(nn.Module):
+    def __init__(self,
+                 en_out_c,
+                 en_down,
+                 en_skip,
+                 en_att_heads,
+                 de_out_c,
+                 de_up,
+                 de_skip,
+                 de_att_heads,
+                 t_out_c,
+                 vae_c=8,
+                 block_deep=4):
+        """
+        :param en_out_c: encoder参数
+        :param en_down:
+        :param en_skip:
+        :param de_out_c: decoder参数
+        :param de_up:
+        :param de_skip:
+        """
+        super(UNet, self).__init__()
+        self.encoder = Encoder(model_in_c=vae_c,
+                               out_cs=en_out_c,
+                               down_sample=en_down,
+                               skip_out=en_skip,
+                               att_num_heads=en_att_heads,
+                               t_in_c=t_out_c,
+                               block_deep=block_deep)
+        self.decoder = Decoder(in_c=en_out_c[-1],
+                               model_out_c=vae_c,
+                               out_cs=de_out_c,
+                               up_sample=de_up,
+                               skip_out=de_skip,
+                               att_num_heads=de_att_heads,
+                               t_in_c=t_out_c,
+                               block_deep=block_deep)
+        self.t_encoder = TEncoder(t_out_c)
+    def forward(self, x, t):
+        t = self.t_encoder(t)
+        # print("encoded_t:", torch.mean(t), torch.std(t))
+        # print("t:", t.shape)
+        encoder_out = self.encoder(x, t)
+        # print("encode:")
+        # for e in encoder_out:
+        #     print(e.shape)
+        decoder_out = self.decoder(encoder_out, t)
+        # print("decoder:")
+        # print(decoder_out.shape)
+        return decoder_out
+if __name__ == '__main__':
+    import cv2, os
+    def modelSave(model, save_path, save_name):
+        if not os.path.exists(save_path):
+            os.mkdir(save_path)
+        torch.save(model.state_dict(), os.path.join(save_path, save_name))
+    def merge_images(images: np.ndarray):
+        """
+        合并图像
+        :param images: 图像数组
+        :return: 合并后的图像数组
+        """
+        n, h, w, c = images.shape
+        nn = int(np.ceil(n ** 0.5))
+        merged_image = np.zeros((h * nn, w * nn, 3), dtype=images.dtype)
+        for i in range(n):
+            row = i // nn
+            col = i % nn
+            merged_image[row * h:(row + 1) * h, col * w:(col + 1) * w, :] = images[i]
+        merged_image = np.clip(merged_image, 0, 255)
+        merged_image = np.array(merged_image, dtype=np.uint8)
+        return merged_image
+    # 320,448,576,832
+    config = {  # 模型结构相关
+        "en_out_c": (256, 256, 256, 320, 320, 320, 576, 576, 576, 704, 704, 704),
+        "en_down": (0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0),
+        "en_skip": (0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1),
+        "en_att_heads": (8, 8, 8, 0, 8, 8, 0, 8, 8, 0, 8, 8),
+        "de_out_c": (704, 576, 576, 576, 320, 320, 320, 256, 256, 256, 256),
+        "de_up": ("none", "subpix", "none", "none", "subpix", "none", "none", "subpix", "none", "none", "none"),
+        "de_skip": (1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0),
+        "de_att_heads": (8, 8, 0, 8, 8, 0, 8, 8, 0, 8, 8),  # skip的地方不做self-attention
+        "t_out_c": 256,
+        "vae_c": 4,
+        "block_deep": 3,
+    }
+    device = "cuda"
+    total_step = 1000
+    unet = UNet(config["en_out_c"], config["en_down"], config["en_skip"], config["en_att_heads"],
+                config["de_out_c"], config["de_up"], config["de_skip"], config["de_att_heads"],
+                config["t_out_c"], config["vae_c"], config["block_deep"]).to(device)
+    print("总参数", sum(i.numel() for i in unet.parameters()) / 10000, "单位:万")
+    print("encoder", sum(i.numel() for i in unet.encoder.parameters()) / 10000, "单位:万")
+    print("decoder", sum(i.numel() for i in unet.decoder.parameters()) / 10000, "单位:万")
+    print("t", sum(i.numel() for i in unet.t_encoder.parameters()) / 10000, "单位:万")
+    batch_size = 2
+    x = np.random.random((batch_size, config["vae_c"], 32, 32))
+    t = np.random.uniform(1, total_step + 0.9999, size=(batch_size, 1))
+    t = np.array(t, dtype=np.int16)
+    t = t / total_step
+    with torch.no_grad():
+        x = torch.Tensor(x).to(device)
+        t = torch.Tensor(t).to(device)
+        y = unet(x, t)
+        print(y.shape)
+        z = y[0].cpu().numpy()
+        # z = (z - np.mean(z)) / (np.max(z) - np.min(z))
+        z = np.clip(np.asarray((z + 1) * 127.5), 0, 255)
+        z = np.asarray(z, dtype=np.uint8)
+        z = [np.tile(z[ii, :, :, np.newaxis], (1, 1, 3)) for ii in range(z.shape[0])]
+        noise = merge_images(np.array(z))
+        noise = cv2.resize(noise, None, fx=2, fy=2)
+        cv2.imshow("noise", noise)
+        cv2.waitKey(0)
+    # modelSave(unet, "./", "test.pth")
+    # 导出为onnx格式
+    torch.onnx.export(
+        unet,
+        (x, t),
+        'unet.onnx',
+        export_params=True,
+        opset_version=12,
+    )
+    import onnx
+    # 增加维度信息
+    model_file = 'unet.onnx'
+    onnx_model = onnx.load(model_file)
+    onnx.save(onnx.shape_inference.infer_shapes(onnx_model), model_file)

requirements.txt ADDED Viewed

	@@ -0,0 +1,186 @@

+absl-py==1.3.0
+addict==2.4.0
+aiofiles==23.1.0
+aiohttp==3.8.3
+aiosignal==1.3.1
+aliyun-python-sdk-core==2.13.36
+aliyun-python-sdk-kms==2.16.0
+altair==4.2.0
+anyio==3.6.2
+appdirs==1.4.4
+asttokens==2.3.0
+async-timeout==4.0.2
+attrs==22.1.0
+audioread==3.0.0
+backcall==0.2.0
+certifi==2022.12.7
+cffi==1.15.1
+charset-normalizer==2.1.1
+chumpy==0.70
+click==8.1.3
+clip==1.0
+colorama==0.4.6
+commonmark==0.9.1
+contourpy==1.0.6
+cpm-kernels==1.0.11
+crcmod==1.7
+cryptography==39.0.2
+cycler==0.11.0
+Cython==0.29.32
+datasets==2.8.0
+decorator==5.1.1
+decord==0.6.0
+diffusers==0.20.1
+dill==0.3.6
+docker-pycreds==0.4.0
+einops==0.6.0
+entrypoints==0.4
+exceptiongroup==1.1.3
+executing==1.2.0
+fastapi==0.88.0
+ffmpy==0.3.0
+filelock==3.8.2
+Flask==2.0.2
+Flask-Cors==3.0.10
+fonttools==4.38.0
+frozenlist==1.3.3
+fsspec==2022.11.0
+ftfy==6.1.1
+gast==0.5.3
+gitdb==4.0.10
+GitPython==3.1.32
+gradio==3.39.0
+gradio_client==0.3.0
+h11==0.14.0
+httpcore==0.16.2
+httpx==0.23.1
+huggingface-hub==0.16.4
+icetk==0.0.4
+idna==3.4
+importlib-metadata==5.2.0
+ipython==8.15.0
+itsdangerous==2.1.2
+jedi==0.19.0
+Jinja2==3.1.2
+jmespath==0.10.0
+joblib==1.2.0
+json-tricks==3.16.1
+jsonplus==0.8.0
+jsonschema==4.17.3
+kiwisolver==1.4.4
+lazy_loader==0.1
+librosa==0.10.0
+linkify-it-py==1.0.3
+lion-pytorch==0.1.2
+llvmlite==0.39.1
+loguru==0.6.0
+Markdown==3.4.1
+markdown-it-py==2.1.0
+MarkupSafe==2.1.1
+matplotlib==3.6.2
+matplotlib-inline==0.1.6
+mdit-py-plugins==0.3.3
+mdurl==0.1.2
+mediapipe==0.8.11
+mmcv-full==1.7.0
+mmdet==2.26.0
+model-index==0.1.11
+modelscope==1.3.2
+mpmath==1.2.1
+msgpack==1.0.4
+multidict==6.0.3
+multiprocess==0.70.14
+munkres==1.1.4
+networkx==3.0
+numba==0.56.4
+numpy==1.23.4
+onnx==1.14.1
+opencv-contrib-python==4.5.5.64
+opencv-python==4.5.5.64
+openmim==0.3.3
+ordered-set==4.1.0
+orjson==3.8.3
+oss2==2.16.0
+packaging==21.3
+pandas==1.5.2
+parso==0.8.3
+pathtools==0.1.2
+pickleshare==0.7.5
+Pillow==9.2.0
+pip==23.1.2
+platformdirs==3.1.0
+plotly==5.11.0
+pooch==1.7.0
+prodigyopt==1.0
+prompt-toolkit==3.0.39
+protobuf==4.24.2
+psutil==5.9.5
+pure-eval==0.2.2
+pyarrow==11.0.0
+pycocotools==2.0.6
+pycparser==2.21
+pycryptodome==3.16.0
+pydantic==1.10.2
+pydub==0.25.1
+Pygments==2.13.0
+pyparsing==3.0.9
+pyrsistent==0.19.2
+python-dateutil==2.8.2
+python-multipart==0.0.5
+pytorch-fid==0.3.0
+pytz==2022.6
+PyYAML==6.0
+regex==2022.10.31
+requests==2.28.1
+responses==0.18.0
+rfc3986==1.5.0
+rich==12.6.0
+safetensors==0.3.3
+scikit-learn==1.2.1
+scipy==1.9.3
+semantic-version==2.10.0
+sentencepiece==0.1.97
+sentry-sdk==1.28.0
+setproctitle==1.3.2
+setuptools==65.5.0
+simplejson==3.18.3
+six==1.16.0
+smmap==5.0.0
+sniffio==1.3.0
+sortedcontainers==2.4.0
+soundfile==0.12.1
+soxr==0.3.4
+stack-data==0.6.2
+starlette==0.22.0
+sympy==1.11.1
+tabulate==0.9.0
+tenacity==8.1.0
+terminaltables==3.1.10
+threadpoolctl==3.1.0
+timm==0.4.9
+tokenizers==0.13.2
+toolz==0.12.0
+torch==2.0.0+cu117
+torchaudio==2.0.1+cu117
+torchinfo==1.7.1
+torchvision==0.15.1+cu117
+tqdm==4.64.1
+traitlets==5.9.0
+transformers==4.26.1
+typing_extensions==4.4.0
+uc-micro-py==1.0.1
+unicodedata2==15.0.0
+urllib3==1.26.12
+uvicorn==0.20.0
+wandb==0.15.5
+wcwidth==0.2.5
+websockets==10.4
+Werkzeug==2.2.2
+wheel==0.37.1
+win32-setctime==1.1.0
+wincertstore==0.2
+xtcocotools==1.12
+xxhash==3.2.0
+yapf==0.32.0
+yarl==1.8.2
+zipp==3.11.0

vae/pretrain_vae/models--gsdf--Counterfeit-V2.5/refs/main ADDED Viewed

	@@ -0,0 +1 @@


1	+ 93c5412baf37cbfa23a3278f7b33b0328db581fb

vae/pretrain_vae/models--gsdf--Counterfeit-V2.5/snapshots/93c5412baf37cbfa23a3278f7b33b0328db581fb/vae/config.json ADDED Viewed

	@@ -0,0 +1,29 @@

+{
+  "_class_name": "AutoencoderKL",
+  "_diffusers_version": "0.10.2",
+  "act_fn": "silu",
+  "block_out_channels": [
+    128,
+    256,
+    512,
+    512
+  ],
+  "down_block_types": [
+    "DownEncoderBlock2D",
+    "DownEncoderBlock2D",
+    "DownEncoderBlock2D",
+    "DownEncoderBlock2D"
+  ],
+  "in_channels": 3,
+  "latent_channels": 4,
+  "layers_per_block": 2,
+  "norm_num_groups": 32,
+  "out_channels": 3,
+  "sample_size": 256,
+  "up_block_types": [
+    "UpDecoderBlock2D",
+    "UpDecoderBlock2D",
+    "UpDecoderBlock2D",
+    "UpDecoderBlock2D"
+  ]
+}

vae/pretrain_vae/models--gsdf--Counterfeit-V2.5/snapshots/93c5412baf37cbfa23a3278f7b33b0328db581fb/vae/diffusion_pytorch_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:af03509f25bf282de98626830ef4fa607e596d0d0fbda8f1d6f5ccaa1d334640
+size 334643276

weight/unet_ema.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:598d60a65f5463df4c3c33879c887c5029b41a60b52c4d1481f99e47548b8ff2
+size 857352782