Train Feedback Transformer

This trains a feedback transformer model for auto-regression. You can pick the original feedback transformer or the new version where the keys and values are precalculated.

Here's a Colab notebook for training a feedback transformer on Tiny Shakespeare dataset.

Open In Colab

18import torch
19from labml import experiment
20from labml.configs import option
21from labml.utils.pytorch import get_modules
22from labml_nn.experiments.nlp_autoregression import NLPAutoRegressionConfigs
23from torch import nn

Auto regressive model

26class AutoregressiveModel(nn.Module):
31    def __init__(self, n_vocab: int, d_model: int, transformer: nn.Module):
32        super().__init__()

Token embedding module

34        self.src_embed = nn.Embedding(n_vocab, d_model)
35        self.transformer = transformer
36        self.generator = nn.Linear(d_model, n_vocab)
38    def forward(self, x: torch.Tensor):

Embed the tokens

40        x = self.src_embed(x)

Run it through the the transformer

42        res = self.transformer(x)

Generate logits of the next token

44        return self.generator(res), None

Configurations

The default configs can and will be over-ridden when we start the experiment

47class Configs(NLPAutoRegressionConfigs):
54    model: AutoregressiveModel
55
56    d_model: int = 512
57    heads: int = 8
58    dropout: float = 0.0
59    d_ff: int = 2048
60    n_layers: int = 6
63@option(Configs.model)
64def feedback_transformer(c: Configs):
68    from labml_nn.transformers.feedback import FeedbackTransformer, FeedbackTransformerLayer, \
69        FeedbackAttention, FeedForward
70
71    return AutoregressiveModel(
72        c.n_tokens, c.d_model,
73        FeedbackTransformer(
74            FeedbackTransformerLayer(d_model=c.d_model,
75                                     attn=FeedbackAttention(c.heads, c.d_model, c.dropout),
76                                     feed_forward=FeedForward(c.d_model, c.d_ff, c.dropout),
77                                     dropout_prob=c.dropout),
78            c.n_layers)).to(c.device)

Create updated feedback transformer, with precalculated keys and values.

81@option(Configs.model)
82def feedback_transformer_kv(c: Configs):
86    from labml_nn.transformers.feedback import FeedbackTransformerKV, FeedbackTransformerLayer, \
87        FeedbackAttention, FeedForward
88
89    return AutoregressiveModel(
90        c.n_tokens, c.d_model,
91        FeedbackTransformerKV(
92            FeedbackTransformerLayer(d_model=c.d_model,
93                                     attn=FeedbackAttention(c.heads, c.d_model, c.dropout,
94                                                            is_kv_precomputed=True),
95                                     feed_forward=FeedForward(c.d_model, c.d_ff, c.dropout),
96                                     dropout_prob=c.dropout),
97            c.n_layers, c.d_model, c.heads)).to(c.device)
100def main():

Create experiment

102    experiment.create(name="feedback_transformer")

Create configs

104    conf = Configs()

Load configurations

106    experiment.configs(conf,

A dictionary of configurations to override

108                       {'tokenizer': 'character',
109                        'text': 'tiny_shakespeare',
110                        'optimizer.learning_rate': 1.0,
111                        'optimizer.optimizer': 'Noam',
112                        'prompt': 'It is',
113                        'prompt_separator': '',

Use feedback_transformer for original feedback transformer

116                        'model': 'feedback_transformer_kv',
117
118                        'train_loader': 'shuffled_train_loader',
119                        'valid_loader': 'shuffled_valid_loader',
120
121                        'seq_len': 128,
122                        'epochs': 128,
123                        'batch_size': 64,
124                        'inner_iterations': 25})

Set models for saving and loading

127    experiment.add_pytorch_models(get_modules(conf))

Start the experiment

130    with experiment.start():

Run the training loop

132        conf.run()
133
134
135if __name__ == '__main__':
136    main()