#

Build FAISS index for k-NN search

We want to build the index of $(f (c_{i}), w_{i})$ . We store $f (c_{i})$ and $w_{i}$ in memory mapped numpy arrays. We find $f (c_{i})$ nearest to $f (c_{t})$ using FAISS. FAISS indexes $(f (c_{i}), i)$ and we query it with $f (c_{t})$ .

15from typing import Optional
16
17import faiss
18import numpy as np
19import torch
20
21from labml import experiment, monit, lab
22from labml.utils.pytorch import get_modules
23from labml_nn.transformers.knn.train_model import Configs

#

Load a saved experiment from train model.

26def load_experiment(run_uuid: str, checkpoint: Optional[int] = None):

#

Create configurations object

32    conf = Configs()

#

Load custom configurations used in the experiment

34    conf_dict = experiment.load_configs(run_uuid)

#

We need to get inputs to the feed forward layer, $f (c_{i})$

36    conf_dict['is_save_ff_input'] = True

#

This experiment is just an evaluation; i.e. nothing is tracked or saved

39    experiment.evaluate()

#

Initialize configurations

41    experiment.configs(conf, conf_dict)

#

Set models for saving/loading

43    experiment.add_pytorch_models(get_modules(conf))

#

Specify the experiment to load from

45    experiment.load(run_uuid, checkpoint)

#

Start the experiment; this is when it actually loads models

48    experiment.start()
49
50    return conf

#

Gather $(f (c_{i}), w_{i})$ and save them in numpy arrays

Note that these numpy arrays will take up a lot of space (even few hundred gigabytes) depending on the size of your dataset.

53def gather_keys(conf: Configs):

#

Dimensions of $f (c_{i})$

62    d_model = conf.transformer.d_model

#

Training data loader

64    data_loader = conf.trainer.data_loader

#

Number of contexts; i.e. number of tokens in the training data minus one. $(f (c_{i}), w_{i})$ for $i \in [2, T]$

67    n_keys = data_loader.data.shape[0] * data_loader.data.shape[1] - 1

#

Numpy array for $f (c_{i})$

69    keys_store = np.memmap(str(lab.get_data_path() / 'keys.npy'), dtype=np.float32, mode='w+', shape=(n_keys, d_model))

#

Numpy array for $w_{i}$

71    vals_store = np.memmap(str(lab.get_data_path() / 'vals.npy'), dtype=np.int, mode='w+', shape=(n_keys, 1))

#

Number of keys $f (c_{i})$ collected

74    added = 0
75    with torch.no_grad():

#

Loop through data

77        for i, batch in monit.enum("Collect data", data_loader, is_children_silent=True):

#

$w_{i}$ the target labels

79            vals = batch[1].view(-1, 1)

#

Input data moved to the device of the model

81            data = batch[0].to(conf.device)

#

Run the model

83            _ = conf.model(data)

#

Get $f (c_{i})$

85            keys = conf.model.ff_input.view(-1, d_model)

#

Save keys, $f (c_{i})$ in the memory mapped numpy array

87            keys_store[added: added + keys.shape[0]] = keys.cpu()

#

Save values, $w_{i}$ in the memory mapped numpy array

89            vals_store[added: added + keys.shape[0]] = vals

#

Increment the number of collected keys

91            added += keys.shape[0]

#