#

GPT-ネオックスでテキストを生成

これは、単一のGPUでGPT-Neoxからテキストを生成する方法を示しています。

これには、45 GB 以上のメモリを搭載した GPU が必要です。

#

輸入

16from typing import List
17
18import torch
19from torch import nn
20
21from labml import monit
22from labml_nn.neox.model import LayerGenerator
23from labml_nn.neox.utils import get_tokens, print_tokens
24from labml_nn.neox.utils.cache import get_cache

#

ロードするレイヤーのリスト。これはテストに使用されます。{0, 1} のようにレイヤーのサブセットを割り当てて、最初のレイヤーのみをトランスフォーマーレイヤーに読み込むことができます

。

29LAYERS = None

#

完了を促すプロンプト

32PROMPT = 'Einstein was born in the German Empire, but moved to Switzerland in 1895, forsaking his German'

#

次のトークンを予測

model モデルです
ids は入力トークン ID
device モデルのデバイスです

35def infer(model: nn.Module, ids: List[int], device: torch.device):

#

44    with torch.no_grad():

#

トークンを入手

46        x = torch.tensor(ids)[None, :].to(device)

#

評価モデル

48        x = model(x)

#

予測トークンを返す

51    return x[0].max(dim=-1)[1].tolist()

#

テキストを生成

54def generate():

#

生成を高速化するために中間キーと値のペアをキャッシュするようにキャッシュを設定

60    cache = get_cache()
61    cache.set('use_cache', True)

#

端末

64    device = torch.device('cuda:0')

#

レイヤーをロード

67    layers = list(LayerGenerator(is_clone_layers=True,
68                                 filter_layers=LAYERS,
69                                 dtype=torch.float16,
70                                 device=device,
71                                 ).load())
72
73    model = nn.Sequential(*layers)

#

トークン ID を取得

76    ids = get_tokens(PROMPT)

#

モデルを実行

79    cache.set('state_ids', (None, 1))
80    with monit.section('Infer'):
81        next_token = infer(model, ids, device)[-1]

#

予測トークンを追加

84    ids += [next_token]

#

トークンを100個予測する

87    for i in range(1, 100):

#

キャッシュされたアクティベーションを使用するように状態を設定します

89        cache.set('state_ids', (i, i + 1))

#

次のトークンを入手してください。以前のトークンのキーと値のペアをキャッシュするので、最後のトークンのみをモデルにフィードすることに注意してください

。

92        with monit.section('Infer'):
93            next_token = infer(model, [next_token], device)[-1]

#

予測トークンを追加

95        ids += [next_token]

#

プリント

97        print_tokens(ids, [ids])

#

101if __name__ == '__main__':
102    generate()