configs.py

#

1from labml.configs import BaseConfigs

#

Transformer Configurations

This defines configurations for a transformer. The configurations are calculate using option functions. These are lazy loaded and therefore only the necessary modules are calculated.

4class RWKVConfigs(BaseConfigs):

#

Number of attention heads

14    n_heads: int = 8

#

Transformer embedding size

16    d_model: int = 512

#

Number of layers

18    n_layers: int = 6

#

Dropout probability

20    dropout: float = 0.1

#

Number of tokens in the source vocabulary (for token embeddings)

22    n_src_vocab: int

#

Number of tokens in the target vocabulary (to generate logits for prediction)

24    n_tgt_vocab: int