GPT-NeoX Tokenizer

This initializes a Hugging Face tokenizer from the downloaded vocabulary.

13from tokenizers import Tokenizer
14
15from labml import lab, monit

Load NeoX Tokenizer

    Returns the tokenizer

18@monit.func('Load NeoX Tokenizer')
19def get_tokenizer() -> Tokenizer:
25    vocab_file = lab.get_data_path() / 'neox' / 'slim_weights' / '20B_tokenizer.json'
26    tokenizer = Tokenizer.from_file(str(vocab_file))
27
28    return tokenizer