13from tokenizers import Tokenizer
14
15from labml import lab, monit
18@monit.func('Load NeoX Tokenizer')
19def get_tokenizer() -> Tokenizer:
25 vocab_file = lab.get_data_path() / 'neox' / 'slim_weights' / '20B_tokenizer.json'
26 tokenizer = Tokenizer.from_file(str(vocab_file))
27
28 return tokenizer