#

去噪扩散概率模型 (DDPM) 采样

有关更简单的 DDPM 实现，请参阅我们的 DDPM 实现。我们对 $α_{t}$ $β_{t}$ 时间表等使用相同的符号。

16from typing import Optional, List
17
18import numpy as np
19import torch
20
21from labml import monit
22from labml_nn.diffusion.stable_diffusion.latent_diffusion import LatentDiffusion
23from labml_nn.diffusion.stable_diffusion.sampler import DiffusionSampler

#

DDPM 采样器

这扩展了DiffusionSampler 基类。

DDPM 通过逐步从 $p_{θ} (x_{t - 1} ∣ x_{t})$ 中反复消除噪点来对图像进行采样

p_{θ} (x_{t - 1} ∣ x_{t}) μ_{t} (x_{t}, t) \tilde{β_{t}} x_{0} = N (x_{t - 1}; μ_{θ} (x_{t}, t), \tilde{β_{t}} I) = \frac{α ˉ _{t - 1} β _{t}}{1 - α _{t} ˉ} x_{0} + \frac{α _{t} ( 1 - α ˉ _{t - 1} )}{1 - α _{t} ˉ} x_{t} = \frac{1 - α ˉ _{t - 1}}{1 - α _{t} ˉ} β_{t} = \frac{1}{α _{t} ˉ} x_{t} - (\frac{1}{α _{t} ˉ} - 1) ϵ_{θ}

26class DDPMSampler(DiffusionSampler):

#

49    model: LatentDiffusion

#

model 是预测噪声的模型 $ϵ_{c ond} (x_{t}, c)$

51    def __init__(self, model: LatentDiffusion):

#

55        super().__init__(model)

#

采样步骤 $1, 2, \dots, T$

58        self.time_steps = np.asarray(list(range(self.n_steps)))
59
60        with torch.no_grad():

#

$\overset{α_{t}}{ˉ}$

62            alpha_bar = self.model.alpha_bar

#

$β_{t}$ 时间表

64            beta = self.model.beta

#

$\overset{α}{ˉ}_{t - 1}$

66            alpha_bar_prev = torch.cat([alpha_bar.new_tensor([1.]), alpha_bar[:-1]])

#

$\overset{α}{ˉ}$

69            self.sqrt_alpha_bar = alpha_bar ** .5

#

$1 - \overset{α}{ˉ}$

71            self.sqrt_1m_alpha_bar = (1. - alpha_bar) ** .5

#

$\frac{1}{α _{t} ˉ}$

73            self.sqrt_recip_alpha_bar = alpha_bar ** -.5

#

$\frac{1}{α _{t} ˉ} - 1$

75            self.sqrt_recip_m1_alpha_bar = (1 / alpha_bar - 1) ** .5

#

$\frac{1 - α ˉ _{t - 1}}{1 - α _{t} ˉ} β_{t}$

78            variance = beta * (1. - alpha_bar_prev) / (1. - alpha_bar)

#

封闭了日志 $\tilde{β_{t}}$

80            self.log_var = torch.log(torch.clamp(variance, min=1e-20))

#

$\frac{α ˉ _{t - 1} β _{t}}{1 - α _{t} ˉ}$

82            self.mean_x0_coef = beta * (alpha_bar_prev ** .5) / (1. - alpha_bar)

#

$\frac{α _{t} ( 1 - α ˉ _{t - 1} )}{1 - α _{t} ˉ}$

84            self.mean_xt_coef = (1. - alpha_bar_prev) * ((1 - beta) ** 0.5) / (1. - alpha_bar)

#

采样回路

shape 是表单中生成的图像的形状[batch_size, channels, height, width]
cond 是条件嵌入 $c$
temperature 是噪声温度（随机噪声乘以此值）
x_last 是 $x_{T}$ 。如果未提供，将使用随机噪声。
uncond_scale 是无条件指导量表 $s$ 。这用于 $ϵ_{θ} (x_{t}, c) = s ϵ_{c ond} (x_{t}, c) + (s - 1) ϵ_{c ond} (x_{t}, c_{u})$
uncond_cond 是空提示的条件嵌入 $c_{u}$
skip_steps 是要跳过的时间步数 $t^{'}$ 。我们从开始采样 $T - t^{'}$ 。然后x_last 就是这样 $x_{T - t^{'}}$ 。

86    @torch.no_grad()
87    def sample(self,
88               shape: List[int],
89               cond: torch.Tensor,
90               repeat_noise: bool = False,
91               temperature: float = 1.,
92               x_last: Optional[torch.Tensor] = None,
93               uncond_scale: float = 1.,
94               uncond_cond: Optional[torch.Tensor] = None,
95               skip_steps: int = 0,
96               ):

#

获取设备和批次大小

113        device = self.model.device
114        bs = shape[0]

#

获取 $x_{T}$

117        x = x_last if x_last is not None else torch.randn(shape, device=device)

#

采样的时间步长 $T - t^{'}, T - t^{'} - 1, \dots, 1$

120        time_steps = np.flip(self.time_steps)[skip_steps:]

#

采样回路

123        for step in monit.iterate('Sample', time_steps):

#

时间步长 $t$

125            ts = x.new_full((bs,), step, dtype=torch.long)

#

示例 $x_{t - 1}$

128            x, pred_x0, e_t = self.p_sample(x, cond, ts, step,
129                                            repeat_noise=repeat_noise,
130                                            temperature=temperature,
131                                            uncond_scale=uncond_scale,
132                                            uncond_cond=uncond_cond)

#

返回 $x_{0}$

135        return x

#

样本 $x_{t - 1}$ 来自 $p_{θ} (x_{t - 1} ∣ x_{t})$

x 是形 $x_{t}$ 状的[batch_size, channels, height, width]
c 是形状 $c$ 的条件嵌入[batch_size, emb_size]
t 是形 $t$ 状的[batch_size]
step 是整数形式的步 $t$ 长:repeat_noise: 指定批次中所有样本的噪声是否应相同
temperature 是噪声温度（随机噪声乘以此值）
uncond_scale 是无条件指导量表 $s$ 。这用于 $ϵ_{θ} (x_{t}, c) = s ϵ_{c ond} (x_{t}, c) + (s - 1) ϵ_{c ond} (x_{t}, c_{u})$
uncond_cond 是空提示的条件嵌入 $c_{u}$

137    @torch.no_grad()
138    def p_sample(self, x: torch.Tensor, c: torch.Tensor, t: torch.Tensor, step: int,
139                 repeat_noise: bool = False,
140                 temperature: float = 1.,
141                 uncond_scale: float = 1., uncond_cond: Optional[torch.Tensor] = None):

#

获取 $ϵ_{θ}$

157        e_t = self.get_eps(x, t, c,
158                           uncond_scale=uncond_scale,
159                           uncond_cond=uncond_cond)

#

获取批次大小

162        bs = x.shape[0]

#

$\frac{1}{α _{t} ˉ}$

165        sqrt_recip_alpha_bar = x.new_full((bs, 1, 1, 1), self.sqrt_recip_alpha_bar[step])

#

$\frac{1}{α _{t} ˉ} - 1$

167        sqrt_recip_m1_alpha_bar = x.new_full((bs, 1, 1, 1), self.sqrt_recip_m1_alpha_bar[step])

#

$x_{0}$ 用电流计算 $ϵ_{θ}$

$x_{0} = \frac{1}{α _{t} ˉ} x_{t} - (\frac{1}{α _{t} ˉ} - 1) ϵ_{θ}$

172        x0 = sqrt_recip_alpha_bar * x - sqrt_recip_m1_alpha_bar * e_t

#

$\frac{α ˉ _{t - 1} β _{t}}{1 - α _{t} ˉ}$

175        mean_x0_coef = x.new_full((bs, 1, 1, 1), self.mean_x0_coef[step])

#

$\frac{α _{t} ( 1 - α ˉ _{t - 1} )}{1 - α _{t} ˉ}$

177        mean_xt_coef = x.new_full((bs, 1, 1, 1), self.mean_xt_coef[step])

#

计算 $μ_{t} (x_{t}, t)$

$μ_{t} (x_{t}, t) = \frac{α ˉ _{t - 1} β _{t}}{1 - α _{t} ˉ} x_{0} + \frac{α _{t} ( 1 - α ˉ _{t - 1} )}{1 - α _{t} ˉ} x_{t}$

183        mean = mean_x0_coef * x0 + mean_xt_coef * x

#

$lo g \tilde{β_{t}}$

185        log_var = x.new_full((bs, 1, 1, 1), self.log_var[step])

#

$t = 1$ （最后一步采样过程）时不要添加噪音。注意那step 是0 时候 $t = 1$ ）

189        if step == 0:
190            noise = 0

#

如果批次中的所有样品都使用相同的噪声

192        elif repeat_noise:
193            noise = torch.randn((1, *x.shape[1:]))

#

每个样本的噪声不同

195        else:
196            noise = torch.randn(x.shape)

#

将噪声乘以温度

199        noise = noise * temperature

#

样本来自

$p_{θ} (x_{t - 1} ∣ x_{t}) = N (x_{t - 1}; μ_{θ} (x_{t}, t), \tilde{β_{t}} I)$

204        x_prev = mean + (0.5 * log_var).exp() * noise

#

207        return x_prev, x0, e_t

#

样本来自 $q (x_{t} ∣ x_{0})$

$q (x_{t} ∣ x_{0}) = N (x_{t}; \overset{α_{t}}{ˉ} x_{0}, (1 - \overset{α_{t}}{ˉ}) I)$

x0 是形 $x_{0}$ 状的[batch_size, channels, height, width]
index 是时间步长 $t$ 指数
noise 是噪音， $ϵ$

209    @torch.no_grad()
210    def q_sample(self, x0: torch.Tensor, index: int, noise: Optional[torch.Tensor] = None):

#

如果未指定噪声，则为随机噪声

222        if noise is None:
223            noise = torch.randn_like(x0)

#

样本来自 $N (x_{t}; \overset{α_{t}}{ˉ} x_{0}, (1 - \overset{α_{t}}{ˉ}) I)$

226        return self.sqrt_alpha_bar[index] * x0 + self.sqrt_1m_alpha_bar[index] * noise

去噪扩散概率模型 (DDPM) 采样

DDPM 采样器

采样回路

样本xt−1​来自pθ​(xt−1​∣xt​)

样本来自q(xt​∣x0​)

样本 $x_{t - 1}$ 来自 $p_{θ} (x_{t - 1} ∣ x_{t})$

样本来自 $q (x_{t} ∣ x_{0})$