Train a Graph Attention Network v2 (GATv2) on Cora dataset

11import torch
12from torch import nn
13
14from labml import experiment
15from labml.configs import option
16from labml_nn.graphs.gat.experiment import Configs as GATConfigs
17from labml_nn.graphs.gatv2 import GraphAttentionV2Layer

Graph Attention Network v2 (GATv2)

This graph attention network has two graph attention layers.

20class GATv2(nn.Module):
  • in_features is the number of features per node
  • n_hidden is the number of features in the first graph attention layer
  • n_classes is the number of classes
  • n_heads is the number of heads in the graph attention layers
  • dropout is the dropout probability
  • share_weights if set to True, the same matrix will be applied to the source and the target node of every edge
27    def __init__(self, in_features: int, n_hidden: int, n_classes: int, n_heads: int, dropout: float,
28                 share_weights: bool = True):
37        super().__init__()

First graph attention layer where we concatenate the heads

40        self.layer1 = GraphAttentionV2Layer(in_features, n_hidden, n_heads,
41                                            is_concat=True, dropout=dropout, share_weights=share_weights)

Activation function after first graph attention layer

43        self.activation = nn.ELU()

Final graph attention layer where we average the heads

45        self.output = GraphAttentionV2Layer(n_hidden, n_classes, 1,
46                                            is_concat=False, dropout=dropout, share_weights=share_weights)

Dropout

48        self.dropout = nn.Dropout(dropout)
  • x is the features vectors of shape [n_nodes, in_features]
  • adj_mat is the adjacency matrix of the form [n_nodes, n_nodes, n_heads] or [n_nodes, n_nodes, 1]
50    def forward(self, x: torch.Tensor, adj_mat: torch.Tensor):

Apply dropout to the input

57        x = self.dropout(x)

First graph attention layer

59        x = self.layer1(x, adj_mat)

Activation function

61        x = self.activation(x)

Dropout

63        x = self.dropout(x)

Output layer (without activation) for logits

65        return self.output(x, adj_mat)

Configurations

Since the experiment is same as GAT experiment but with GATv2 model we extend the same configs and change the model.

68class Configs(GATConfigs):

Whether to share weights for source and target nodes of edges

77    share_weights: bool = False

Set the model

79    model: GATv2 = 'gat_v2_model'

Create GATv2 model

82@option(Configs.model)
83def gat_v2_model(c: Configs):
87    return GATv2(c.in_features, c.n_hidden, c.n_classes, c.n_heads, c.dropout, c.share_weights).to(c.device)
90def main():

Create configurations

92    conf = Configs()

Create an experiment

94    experiment.create(name='gatv2')

Calculate configurations.

96    experiment.configs(conf, {

Adam optimizer

98        'optimizer.optimizer': 'Adam',
99        'optimizer.learning_rate': 5e-3,
100        'optimizer.weight_decay': 5e-4,
101
102        'dropout': 0.7,
103    })

Start and watch the experiment

106    with experiment.start():

Run the training

108        conf.run()

112if __name__ == '__main__':
113    main()