15from labml import experiment
16from labml.configs import option
17from labml_nn.transformers import TransformerConfigs
18from labml_nn.transformers.basic.autoregressive_experiment import Configs
19from labml_nn.transformers.configs import FeedForwardConfigs
20from labml_nn.transformers.primer_ez import SquaredReLU
23@option(FeedForwardConfigs.activation, 'SquaredReLU')
24def _squared_relu():
30 return SquaredReLU()
33@option(TransformerConfigs.encoder_attn, 'MultiDConvHeadAttention')
34def _d_conv_mha(c: TransformerConfigs):
40 from labml_nn.transformers.primer_ez import MultiDConvHeadAttention
41 return MultiDConvHeadAttention(c.n_heads, c.d_model, dropout_prob=c.dropout)
44@option(TransformerConfigs.encoder_attn, 'MultiDSharedConvHeadAttention')
45def _d_shared_conv_mha(c: TransformerConfigs):
53 from labml_nn.transformers.primer_ez.variations import MultiDSharedConvHeadAttention
54 return MultiDSharedConvHeadAttention(c.n_heads, c.d_model, dropout_prob=c.dropout)
57@option(TransformerConfigs.encoder_attn, 'MultiDPHConvHeadAttention')
58def _d_per_head_conv_mha(c: TransformerConfigs):
66 from labml_nn.transformers.primer_ez.variations import MultiDPHConvHeadAttention
67 return MultiDPHConvHeadAttention(c.n_heads, c.d_model, dropout_prob=c.dropout)
70def main():
创建实验
72 experiment.create(name="primer_ez")
创建配置
74 conf = Configs()
覆盖配置
76 experiment.configs(conf, {
使用角色等级分词器
78 'tokenizer': 'character',
提示分隔符为空
80 'prompt_separator': '',
开始采样提示
82 'prompt': 'It is ',
使用小莎士比亚数据集
84 'text': 'tiny_shakespeare',
使用上下文大小为
87 'seq_len': 256,
为时代而训练
89 'epochs': 128,
批量大小
91 'batch_size': 32,
在训练和验证之间切换每个纪元的次数
94 'inner_iterations': 10,
型号尺寸
97 'd_model': 512,
98 'transformer.ffn.d_ff': 2048,
使用 Adam 优化器
101 'optimizer.optimizer': 'Adam',
102 'optimizer.learning_rate': 2.5e-4,
112 'transformer.encoder_attn': 'MultiDConvHeadAttention',
113 })
设置用于保存和加载的模型
116 experiment.add_pytorch_models({'model': conf.model})
开始实验
119 with experiment.start():
跑步训练
121 conf.run()
125if __name__ == '__main__':
126 main()