15from labml import experiment
16from labml.configs import option
17from labml_nn.transformers import TransformerConfigs
18from labml_nn.transformers.basic.autoregressive_experiment import Configs
19from labml_nn.transformers.configs import FeedForwardConfigs
20from labml_nn.transformers.primer_ez import SquaredReLU23@option(FeedForwardConfigs.activation, 'SquaredReLU')
24def _squared_relu():30 return SquaredReLU()33@option(TransformerConfigs.encoder_attn, 'MultiDConvHeadAttention')
34def _d_conv_mha(c: TransformerConfigs):40 from labml_nn.transformers.primer_ez import MultiDConvHeadAttention
41 return MultiDConvHeadAttention(c.n_heads, c.d_model, dropout_prob=c.dropout)44@option(TransformerConfigs.encoder_attn, 'MultiDSharedConvHeadAttention')
45def _d_shared_conv_mha(c: TransformerConfigs):53 from labml_nn.transformers.primer_ez.variations import MultiDSharedConvHeadAttention
54 return MultiDSharedConvHeadAttention(c.n_heads, c.d_model, dropout_prob=c.dropout)57@option(TransformerConfigs.encoder_attn, 'MultiDPHConvHeadAttention')
58def _d_per_head_conv_mha(c: TransformerConfigs):66 from labml_nn.transformers.primer_ez.variations import MultiDPHConvHeadAttention
67 return MultiDPHConvHeadAttention(c.n_heads, c.d_model, dropout_prob=c.dropout)70def main():创建实验
72 experiment.create(name="primer_ez")创建配置
74 conf = Configs()覆盖配置
76 experiment.configs(conf, {使用角色等级分词器
78 'tokenizer': 'character',提示分隔符为空
80 'prompt_separator': '',开始采样提示
82 'prompt': 'It is ',使用小莎士比亚数据集
84 'text': 'tiny_shakespeare',使用上下文大小为
87 'seq_len': 256,为时代而训练
89 'epochs': 128,批量大小
91 'batch_size': 32,在训练和验证之间切换每个纪元的次数
94 'inner_iterations': 10,型号尺寸
97 'd_model': 512,
98 'transformer.ffn.d_ff': 2048,使用 Adam 优化器
101 'optimizer.optimizer': 'Adam',
102 'optimizer.learning_rate': 2.5e-4,112 'transformer.encoder_attn': 'MultiDConvHeadAttention',
113 })设置用于保存和加载的模型
116 experiment.add_pytorch_models({'model': conf.model})开始实验
119 with experiment.start():跑步训练
121 conf.run()125if __name__ == '__main__':
126 main()