11import argparse
12import os
13from pathlib import Path
14
15import torch
16
17from labml import lab, monit
18from labml_nn.diffusion.stable_diffusion.latent_diffusion import LatentDiffusion
19from labml_nn.diffusion.stable_diffusion.sampler.ddim import DDIMSampler
20from labml_nn.diffusion.stable_diffusion.sampler.ddpm import DDPMSampler
21from labml_nn.diffusion.stable_diffusion.util import load_model, save_images, set_seed
24class Txt2Img:
28 model: LatentDiffusion
30 def __init__(self, *,
31 checkpoint_path: Path,
32 sampler_name: str,
33 n_steps: int = 50,
34 ddim_eta: float = 0.0,
35 ):
获取设备
45 self.device = torch.device("cuda:0") if torch.cuda.is_available() else torch.device("cpu")
将模型移至设备
47 self.model.to(self.device)
50 if sampler_name == 'ddim':
51 self.sampler = DDIMSampler(self.model,
52 n_steps=n_steps,
53 ddim_eta=ddim_eta)
54 elif sampler_name == 'ddpm':
55 self.sampler = DDPMSampler(self.model)
dest_path
是存储生成的图像的路径batch_size
是批量生成的图像数量prompt
是使用以下命令生成图像的提示h
是图像的高度w
是图像的宽度uncond_scale
是无条件指导量表。这用于57 @torch.no_grad()
58 def __call__(self, *,
59 dest_path: str,
60 batch_size: int = 3,
61 prompt: str,
62 h: int = 512, w: int = 512,
63 uncond_scale: float = 7.5,
64 ):
图像中的通道数
75 c = 4
降低图像到潜在空间的分辨率
77 f = 8
做一批提示
80 prompts = batch_size * [prompt]
AMP 自动投射
83 with torch.cuda.amp.autocast():
在无条件缩放中,无法获取空提示的嵌入值(无条件)。
85 if uncond_scale != 1.0:
86 un_cond = self.model.get_text_conditioning(batch_size * [""])
87 else:
88 un_cond = None
获取提示嵌入信息
90 cond = self.model.get_text_conditioning(prompts)
93 x = self.sampler.sample(cond=cond,
94 shape=[batch_size, c, h // f, w // f],
95 uncond_scale=uncond_scale,
96 uncond_cond=un_cond)
保存图片
101 save_images(images, dest_path, 'txt_')
104def main():
108 parser = argparse.ArgumentParser()
109
110 parser.add_argument(
111 "--prompt",
112 type=str,
113 nargs="?",
114 default="a painting of a virus monster playing guitar",
115 help="the prompt to render"
116 )
117
118 parser.add_argument("--batch_size", type=int, default=4, help="batch size")
119
120 parser.add_argument(
121 '--sampler',
122 dest='sampler_name',
123 choices=['ddim', 'ddpm'],
124 default='ddim',
125 help=f'Set the sampler.',
126 )
127
128 parser.add_argument("--flash", action='store_true', help="whether to use flash attention")
129
130 parser.add_argument("--steps", type=int, default=50, help="number of sampling steps")
131
132 parser.add_argument("--scale", type=float, default=7.5,
133 help="unconditional guidance scale: "
134 "eps = eps(x, empty) + scale * (eps(x, cond) - eps(x, empty))")
135
136 opt = parser.parse_args()
137
138 set_seed(42)
设置闪光灯注意力
141 from labml_nn.diffusion.stable_diffusion.model.unet_attention import CrossAttention
142 CrossAttention.use_flash_attention = opt.flash
145 txt2img = Txt2Img(checkpoint_path=lab.get_data_path() / 'stable-diffusion' / 'sd-v1-4.ckpt',
146 sampler_name=opt.sampler_name,
147 n_steps=opt.steps)
148
149 with monit.section('Generate'):
150 txt2img(dest_path='outputs',
151 batch_size=opt.batch_size,
152 prompt=opt.prompt,
153 uncond_scale=opt.scale)
157if __name__ == "__main__":
158 main()