# 位置前馈网络 (FFN)

FFN 由两个完全连接的层组成。隐藏层中的维度数，通常设置为令牌嵌入的四倍左右。因此，它有时也被称为扩张和收缩网络。

### 门控线性单元

38import torch
39from torch import nn as nn
40
41from labml_helpers.module import Module

## FFN 模块

44class FeedForward(Module):
• d_model 是令牌嵌入中的要素数量
• d_ff 是 FFN 隐藏层中的要素数量
• dropout 是隐藏层的丢失概率
• is_gated 指定隐藏层是否为门控
• bias1 指定第一个完全连接的层是否应该有可学习的偏差
• bias2 指定第二个完全连接的层是否应该有可学习的偏差
• bias_gate 指定门的全连接层是否应具有可学习的偏差
49    def __init__(self, d_model: int, d_ff: int,
50                 dropout: float = 0.1,
51                 activation=nn.ReLU(),
52                 is_gated: bool = False,
53                 bias1: bool = True,
54                 bias2: bool = True,
55                 bias_gate: bool = True):
65        super().__init__()

67        self.layer1 = nn.Linear(d_model, d_ff, bias=bias1)

69        self.layer2 = nn.Linear(d_ff, d_model, bias=bias2)

71        self.dropout = nn.Dropout(dropout)

73        self.activation = activation

75        self.is_gated = is_gated
76        if is_gated:

79            self.linear_v = nn.Linear(d_model, d_ff, bias=bias_gate)
81    def forward(self, x: torch.Tensor):
83        g = self.activation(self.layer1(x))

85        if self.is_gated:
86            x = g * self.linear_v(x)

88        else:
89            x = g

91        x = self.dropout(x)

94        return self.layer2(x)