#

降噪扩散隐含模型 (DDIM) 采样

这实现了来自论文 “降噪扩散隐式模型” 的 DDIM 采样

16from typing import Optional, List
17
18import numpy as np
19import torch
20
21from labml import monit
22from labml_nn.diffusion.stable_diffusion.latent_diffusion import LatentDiffusion
23from labml_nn.diffusion.stable_diffusion.sampler import DiffusionSampler

#

DDIM 采样器

这扩展了DiffusionSampler 基类。

DDPM 通过逐步采样来反复消除噪点来对图像进行采样，

x_{τ_{i - 1}} = α_{τ_{i - 1}} (\frac{x _{τ_{i}} - 1 - α _{τ_{i}} ϵ _{θ} ( x _{τ_{i}} )}{α _{τ_{i}}}) + 1 - α_{τ_{i - 1}} - σ_{τ_{i}}^{2} \cdot ϵ_{θ} (x_{τ_{i}}) + σ_{τ_{i}} ϵ_{τ_{i}}

其中 $ϵ_{τ_{i}}$ ，是随机噪声， $τ$ 是长度为 $[1, 2, \dots, T]$ 的子序列 $S$ ， $σ_{τ_{i}} = η \frac{1 - α _{τ_{i - 1}}}{1 - α _{τ_{i}}} 1 - \frac{α _{τ_{i}}}{α _{τ_{i - 1}}}$

请注意， $α_{t}$ 在 DDIM 论文中，指的是来 $\overset{α_{t}}{ˉ}$ 自 DDPM 的论文。

26class DDIMSampler(DiffusionSampler):

#

52    model: LatentDiffusion

#

model 是预测噪声的模型 $ϵ_{c ond} (x_{t}, c)$
n_steps 是 DDIM 采样步骤的数量， $S$
ddim_discretize 指定如何 $τ$ 从中提取 $[1, 2, \dots, T]$ 。可以是uniform 或quad 。
ddim_eta $η$ 用于计算 $σ_{τ_{i}}$ 。 $η = 0$ 使采样过程具有确定性。

54    def __init__(self, model: LatentDiffusion, n_steps: int, ddim_discretize: str = "uniform", ddim_eta: float = 0.):

#

63        super().__init__(model)

#

步数， $T$

65        self.n_steps = model.n_steps

#

计算 $τ$ 得均匀分布在各处 $[1, 2, \dots, T]$

68        if ddim_discretize == 'uniform':
69            c = self.n_steps // n_steps
70            self.time_steps = np.asarray(list(range(0, self.n_steps, c))) + 1

#

计算 $τ$ 以二次分布 $[1, 2, \dots, T]$

72        elif ddim_discretize == 'quad':
73            self.time_steps = ((np.linspace(0, np.sqrt(self.n_steps * .8), n_steps)) ** 2).astype(int) + 1
74        else:
75            raise NotImplementedError(ddim_discretize)
76
77        with torch.no_grad():

#

获取 $\overset{α_{t}}{ˉ}$

79            alpha_bar = self.model.alpha_bar

#

$α_{τ_{i}}$

82            self.ddim_alpha = alpha_bar[self.time_steps].clone().to(torch.float32)

#

$α_{τ_{i}}$

84            self.ddim_alpha_sqrt = torch.sqrt(self.ddim_alpha)

#

$α_{τ_{i - 1}}$

86            self.ddim_alpha_prev = torch.cat([alpha_bar[0:1], alpha_bar[self.time_steps[:-1]]])

#

$σ_{τ_{i}} = η \frac{1 - α _{τ_{i - 1}}}{1 - α _{τ_{i}}} 1 - \frac{α _{τ_{i}}}{α _{τ_{i - 1}}}$

91            self.ddim_sigma = (ddim_eta *
92                               ((1 - self.ddim_alpha_prev) / (1 - self.ddim_alpha) *
93                                (1 - self.ddim_alpha / self.ddim_alpha_prev)) ** .5)

#

$1 - α_{τ_{i}}$

96            self.ddim_sqrt_one_minus_alpha = (1. - self.ddim_alpha) ** .5

#

采样回路

shape 是表单中生成的图像的形状[batch_size, channels, height, width]
cond 是条件嵌入 $c$
temperature 是噪声温度（随机噪声乘以此值）
x_last 是 $x_{τ_{S}}$ 。如果未提供，将使用随机噪声。
uncond_scale 是无条件指导量表 $s$ 。这用于 $ϵ_{θ} (x_{t}, c) = s ϵ_{c ond} (x_{t}, c) + (s - 1) ϵ_{c ond} (x_{t}, c_{u})$
uncond_cond 是空提示的条件嵌入 $c_{u}$
skip_steps 是要跳过的时间步数 $i^{'}$ 。我们从开始采样 $S - i^{'}$ 。然后x_last 就是这样 $x_{τ_{S - i^{'}}}$ 。

98    @torch.no_grad()
99    def sample(self,
100               shape: List[int],
101               cond: torch.Tensor,
102               repeat_noise: bool = False,
103               temperature: float = 1.,
104               x_last: Optional[torch.Tensor] = None,
105               uncond_scale: float = 1.,
106               uncond_cond: Optional[torch.Tensor] = None,
107               skip_steps: int = 0,
108               ):

#

获取设备和批次大小

125        device = self.model.device
126        bs = shape[0]

#

获取 $x_{τ_{S}}$

129        x = x_last if x_last is not None else torch.randn(shape, device=device)

#

采样的时间步长 $τ_{S - i^{'}}, τ_{S - i^{'} - 1}, \dots, τ_{1}$

132        time_steps = np.flip(self.time_steps)[skip_steps:]
133
134        for i, step in monit.enum('Sample', time_steps):

#

列表 $i$ 中的索引 $[τ_{1}, τ_{2}, \dots, τ_{S}]$

136            index = len(time_steps) - i - 1

#

时间步长 $τ_{i}$

138            ts = x.new_full((bs,), step, dtype=torch.long)

#

示例 $x_{τ_{i - 1}}$

141            x, pred_x0, e_t = self.p_sample(x, cond, ts, step, index=index,
142                                            repeat_noise=repeat_noise,
143                                            temperature=temperature,
144                                            uncond_scale=uncond_scale,
145                                            uncond_cond=uncond_cond)

#

返回 $x_{0}$

148        return x

#

示例 $x_{τ_{i - 1}}$

x 是形 $x_{τ_{i}}$ 状的[batch_size, channels, height, width]
c 是形状 $c$ 的条件嵌入[batch_size, emb_size]
t 是形 $τ_{i}$ 状的[batch_size]
step 是整数 $τ_{i}$ 形式的步长
index 是列表 $i$ 中的索引 $[τ_{1}, τ_{2}, \dots, τ_{S}]$
repeat_noise 指定批次中所有样本的噪声是否应相同
temperature 是噪声温度（随机噪声乘以此值）
uncond_scale 是无条件指导量表 $s$ 。这用于 $ϵ_{θ} (x_{t}, c) = s ϵ_{c ond} (x_{t}, c) + (s - 1) ϵ_{c ond} (x_{t}, c_{u})$
uncond_cond 是空提示的条件嵌入 $c_{u}$

150    @torch.no_grad()
151    def p_sample(self, x: torch.Tensor, c: torch.Tensor, t: torch.Tensor, step: int, index: int, *,
152                 repeat_noise: bool = False,
153                 temperature: float = 1.,
154                 uncond_scale: float = 1.,
155                 uncond_cond: Optional[torch.Tensor] = None):

#

获取 $ϵ_{θ} (x_{τ_{i}})$

172        e_t = self.get_eps(x, t, c,
173                           uncond_scale=uncond_scale,
174                           uncond_cond=uncond_cond)

#

计算 $x_{τ_{i - 1}}$ 和预测 $x_{0}$

177        x_prev, pred_x0 = self.get_x_prev_and_pred_x0(e_t, index, x,
178                                                      temperature=temperature,
179                                                      repeat_noise=repeat_noise)

#

182        return x_prev, pred_x0, e_t

#

$x_{τ_{i - 1}}$ 给出的样本 $ϵ_{θ} (x_{τ_{i}})$

184    def get_x_prev_and_pred_x0(self, e_t: torch.Tensor, index: int, x: torch.Tensor, *,
185                               temperature: float,
186                               repeat_noise: bool):

#

$α_{τ_{i}}$

192        alpha = self.ddim_alpha[index]

#

$α_{τ_{i - 1}}$

194        alpha_prev = self.ddim_alpha_prev[index]

#

$σ_{τ_{i}}$

196        sigma = self.ddim_sigma[index]

#

$1 - α_{τ_{i}}$

198        sqrt_one_minus_alpha = self.ddim_sqrt_one_minus_alpha[index]

#

目前的预测 $x_{0}$ ， $\frac{x _{τ_{i}} - 1 - α _{τ_{i}} ϵ _{θ} ( x _{τ_{i}} )}{α _{τ_{i}}}$

202        pred_x0 = (x - sqrt_one_minus_alpha * e_t) / (alpha ** 0.5)

#

指向的方向 $x_{t}$ $1 - α_{τ_{i - 1}} - σ_{τ_{i}}^{2} \cdot ϵ_{θ} (x_{τ_{i}})$

205        dir_xt = (1. - alpha_prev - sigma ** 2).sqrt() * e_t

#

在以下情况下不添加任何噪音 $η = 0$

208        if sigma == 0.:
209            noise = 0.

#

如果批次中的所有样品都使用相同的噪声

211        elif repeat_noise:
212            noise = torch.randn((1, *x.shape[1:]), device=x.device)

#

每个样本的噪声不同

214        else:
215            noise = torch.randn(x.shape, device=x.device)

#

将噪声乘以温度

218        noise = noise * temperature

#

x_{τ_{i - 1}} = α_{τ_{i - 1}} (\frac{x _{τ_{i}} - 1 - α _{τ_{i}} ϵ _{θ} ( x _{τ_{i}} )}{α _{τ_{i}}}) + 1 - α_{τ_{i - 1}} - σ_{τ_{i}}^{2} \cdot ϵ_{θ} (x_{τ_{i}}) + σ_{τ_{i}} ϵ_{τ_{i}}

227        x_prev = (alpha_prev ** 0.5) * pred_x0 + dir_xt + sigma * noise

#

230        return x_prev, pred_x0

#

样本来自 $q_{σ, τ} (x_{τ_{i}} ∣ x_{0})$

$q_{σ, τ} (x_{t} ∣ x_{0}) = N (x_{t}; α_{τ_{i}} x_{0}, (1 - α_{τ_{i}}) I)$

x0 是形 $x_{0}$ 状的[batch_size, channels, height, width]
index 是时间步长 $τ_{i}$ 指数 $i$
noise 是噪音， $ϵ$

232    @torch.no_grad()
233    def q_sample(self, x0: torch.Tensor, index: int, noise: Optional[torch.Tensor] = None):

#

如果未指定噪声，则为随机噪声

246        if noise is None:
247            noise = torch.randn_like(x0)

#

样本来自 $q_{σ, τ} (x_{t} ∣ x_{0}) = N (x_{t}; α_{τ_{i}} x_{0}, (1 - α_{τ_{i}}) I)$

252        return self.ddim_alpha_sqrt[index] * x0 + self.ddim_sqrt_one_minus_alpha[index] * noise

#

绘画循环

x 是形 $x_{S^{'}}$ 状的[batch_size, channels, height, width]
cond 是条件嵌入 $c$
t_start 是开始时的采样步骤， $S^{'}$
orig 是我们正在绘制的潜在页面中的原始图像。如果未提供，则将是图像到图像的转换。
mask 是保留原始图像的掩码。
orig_noise 是要添加到原始图像的固定噪点。
uncond_scale 是无条件指导量表 $s$ 。这用于 $ϵ_{θ} (x_{t}, c) = s ϵ_{c ond} (x_{t}, c) + (s - 1) ϵ_{c ond} (x_{t}, c_{u})$
uncond_cond 是空提示的条件嵌入 $c_{u}$

254    @torch.no_grad()
255    def paint(self, x: torch.Tensor, cond: torch.Tensor, t_start: int, *,
256              orig: Optional[torch.Tensor] = None,
257              mask: Optional[torch.Tensor] = None, orig_noise: Optional[torch.Tensor] = None,
258              uncond_scale: float = 1.,
259              uncond_cond: Optional[torch.Tensor] = None,
260              ):

#

获取批次大小

276        bs = x.shape[0]

#

采样的时间步长 $τ_{S ‘}, τ_{S^{'} - 1}, \dots, τ_{1}$

279        time_steps = np.flip(self.time_steps[:t_start])
280
281        for i, step in monit.enum('Paint', time_steps):

#

列表 $i$ 中的索引 $[τ_{1}, τ_{2}, \dots, τ_{S}]$

283            index = len(time_steps) - i - 1

#

时间步长 $τ_{i}$

285            ts = x.new_full((bs,), step, dtype=torch.long)

#

示例 $x_{τ_{i - 1}}$

288            x, _, _ = self.p_sample(x, cond, ts, step, index=index,
289                                    uncond_scale=uncond_scale,
290                                    uncond_cond=uncond_cond)

#

将蒙版区域替换为原始图像

293            if orig is not None:

#

在潜在空间中 $q_{σ, τ} (x_{τ_{i}} ∣ x_{0})$ 获取原始图像

295                orig_t = self.q_sample(orig, index, noise=orig_noise)

#

替换被屏蔽的区域

297                x = orig_t * mask + x * (1 - mask)

#

300        return x

降噪扩散隐含模型 (DDIM) 采样

DDIM 采样器

采样回路

示例xτi−1​​

xτi−1​​给出的样本ϵθ​(xτi​​)

样本来自qσ,τ​(xτi​​∣x0​)

绘画循环

示例 $x_{τ_{i - 1}}$

$x_{τ_{i - 1}}$ 给出的样本 $ϵ_{θ} (x_{τ_{i}})$

样本来自 $q_{σ, τ} (x_{τ_{i}} ∣ x_{0})$