Evaluate k-nearest neighbor language model

11from typing import Optional, List
13import faiss
14import numpy as np
15import torch
17from labml import monit, lab
18from labml.logger import inspect
19from labml_nn.transformers.knn.train_model import Configs

-NN to get

Here we refer to as queries, as keys and as values.

22def knn(queries: torch.Tensor, index: faiss.IndexFlatL2, keys_store: np.ndarray, vals_store: np.ndarray, n_tokens: int):

Save shape of queries to reshape results

31    queries_shape = queries.shape

Flatten the batch and sequence dimensions of queries

34    queries = queries.view(-1, queries_shape[-1])

Find 10 nearest neighbors of among . distance is the distance given by FAISS and idx , is the index of it in keys_store .

38    distance, idx = index.search(queries.numpy(), 10)


41    keys_found = queries.new_tensor(keys_store[idx])


43    vals_found = torch.tensor(vals_store[idx]).squeeze(-1)

We are going to calculate the cosine similarity between normalized vectors


48    keys_found_n = keys_found / torch.sqrt((keys_found ** 2).sum(-1, keepdims=True) + 1e-10)


50    queries_n = queries / torch.sqrt((queries ** 2).sum(-1, keepdims=True) + 1e-10)

Get the dot-product, or cosine similarity

53    dot_prod = (keys_found_n * queries_n.unsqueeze(1)).sum(-1)

Token-wise logits

56    logits_token = dot_prod.new_zeros(queries.shape[0], n_tokens)

Scatter and accumulate token logits based on the nearest neighbors

58    _ = logits_token.scatter_(dim=1, index=vals_found, src=dot_prod, reduce='add')

Reshape the logits

61    logits_token = logits_token.reshape(queries_shape[0], queries_shape[1], -1)
63    return logits_token

Calculate validation loss

We calculate the validation loss of the combined on -NN prediction and transformer prediction. The weight given to the -NN model is given by knn_weight . It's a list of weights and we calculate the validation loss for each.

66def validation_loss(knn_weights: List[float], last_n: Optional[int], conf: Configs, index: faiss.IndexFlatL2,
67                    keys_store: np.ndarray, vals_store: np.ndarray):

List of losses for each knn_weights

77    losses = [[] for _ in knn_weights]

Number of samples in each batch

79    n_samples = []
80    with torch.no_grad():

Iterate through validation data

82        for i, batch in monit.enum("Validation", conf.validator.data_loader, is_children_silent=True):

Get data and target labels

84            data, target = batch[0].to(conf.device), batch[1].to(conf.device)

Run the model and get predictions

86            res = conf.model(data)

Get -NN predictions

88            res_knn = knn(conf.model.ff_input.cpu(), index, keys_store, vals_store, conf.n_tokens)
89            res_knn = res_knn.to(conf.device)

This is to calculate only the loss for last_n tokens. This is important because the first predictions (along the sequence) of transformer model has very few past tokens to look at.

94            if last_n:
95                res = res[-last_n:]
96                res_knn = res_knn[-last_n:]
97                target = target[-last_n:]

Number of samples

100            n_s = res.shape[0] * data.shape[1]
101            n_samples.append(n_s)

Calculate scores for each of knn_weights .

104            for i, c in enumerate(knn_weights):

Calculate the loss

106                loss = conf.loss_func(res_knn * c + (1 - c) * res, target)
107                losses[i].append(loss * n_s)
109    return losses, n_samples

Load the index

112def load_index(conf: Configs, n_probe: int = 8):

Dimensions of

117    d_model = conf.transformer.d_model

Training data loader

119    data_loader = conf.trainer.data_loader

Number of contexts; i.e. number of tokens in the training data minus one. for

122    n_keys = data_loader.data.shape[0] * data_loader.data.shape[1] - 1

Load FAISS index

125    with monit.section('Load index'):
126        index = faiss.read_index(str(lab.get_data_path() / 'faiss.index'))

Set number of cells to probe

128    index.nprobe = n_probe

Load memory mapped numpy arrays

131    keys_store = np.memmap(str(lab.get_data_path() / 'keys.npy'), dtype=np.float32, mode='r', shape=(n_keys, d_model))
132    vals_store = np.memmap(str(lab.get_data_path() / 'vals.npy'), dtype=np.int, mode='r', shape=(n_keys, 1))
134    return index, keys_store, vals_store
137def main():
138    from labml_nn.transformers.knn.build_index import load_experiment

Load the experiment. Replace the run uuid with you run uuid from training the model.

141    conf = load_experiment('4984b85c20bf11eb877a69c1a03717cd')

Set model to evaluation mode

143    conf.model.eval()

Load index

146    index, keys_store, vals_store = load_index(conf)

List of weights given to -NN prediction. We will evaluate the validation loss for each of the weights

149    knn_weights = [i / 20 for i in range(10)]

Evaluate validation loss

151    losses, n_samples = validation_loss(knn_weights, None, conf, index, keys_store, vals_store)

Output the losses for each of knn_weights .

153    inspect({c: np.sum(losses[i]) / np.sum(n_samples) for i, c in enumerate(knn_weights)})
156if __name__ == '__main__':
157    main()