Cyleux
/

danielclone2

Model card Files Files and versions

danielclone2 / verify.py

Cyleux's picture

Upload 18 files

8cea444 almost 3 years ago

history blame contribute delete

3.53 kB

	########################################################################################################
	# The RWKV Language Model - https://github.com/BlinkDL/RWKV-LM
	########################################################################################################

	# this is for verifying the results of different models and make sure they agree with each other

	import os, sys, types
	import numpy as np
	import torch
	np.set_printoptions(precision=4, suppress=True, linewidth=200)
	try:
	os.environ["CUDA_VISIBLE_DEVICES"] = sys.argv[1]
	except:
	pass
	torch.backends.cudnn.benchmark = True
	torch.backends.cudnn.allow_tf32 = False
	torch.backends.cuda.matmul.allow_tf32 = False

	os.environ['RWKV_FLOAT_MODE'] = 'bf16' # bf16 or fp32
	os.environ['RWKV_RUN_DEVICE'] = 'cuda' # currently model_train requires CUDA
	RUN_DEVICE = os.environ['RWKV_RUN_DEVICE']

	TOKEN_MODE = 'pile'

	if TOKEN_MODE == 'pile':
	WORD_NAME = ['20B_tokenizer.json', '20B_tokenizer.json']
	MODEL_NAME = '/fsx/BlinkDL/HF-MODEL/rwkv-4-pile-3b/RWKV-4-Pile-3B-20221003-6783'
	n_layer = 32
	n_embd = 2560
	ctx_len = 1024
	UNKNOWN_CHAR = None

	from src.utils import TOKENIZER
	tokenizer = TOKENIZER(WORD_NAME, UNKNOWN_CHAR=UNKNOWN_CHAR)
	if TOKEN_MODE == 'pile':
	tokenizer.vocab_size = 50277

	########################################################################################################

	os.environ["RWKV_JIT_ON"] = "1"
	os.environ["RWKV_T_MAX"] = str(ctx_len)

	from src.model_run import RWKV_RNN
	from src.model import RWKV

	args = types.SimpleNamespace()
	args.vocab_size = tokenizer.vocab_size
	args.ctx_len = ctx_len
	args.n_embd = n_embd
	args.n_layer = n_layer
	args.head_qk = 0
	args.pre_ffn = 0
	args.grad_cp = 0
	args.my_pos_emb = 0
	model_train = RWKV(args).to(RUN_DEVICE)

	if os.environ['RWKV_FLOAT_MODE'] == 'fp16':
	model_train = model_train.half()
	elif os.environ['RWKV_FLOAT_MODE'] == 'bf16':
	model_train = model_train.bfloat16()

	print('loading ' + MODEL_NAME)
	m2 = torch.load(MODEL_NAME + '.pth', map_location='cpu')
	model_train.load_state_dict(m2)

	if os.environ['RWKV_FLOAT_MODE'] == 'fp16':
	model_train = model_train.half()
	elif os.environ['RWKV_FLOAT_MODE'] == 'bf16':
	model_train = model_train.bfloat16()

	args.MODEL_NAME = MODEL_NAME
	args.RUN_DEVICE = RUN_DEVICE
	args.FLOAT_MODE = os.environ['RWKV_FLOAT_MODE']
	model_rnn = RWKV_RNN(args)

	########################################################################################################

	print(f"\nVerifying {os.environ['RWKV_RUN_DEVICE']} {os.environ['RWKV_FLOAT_MODE']}")

	# context = '\nIn a'
	context = '\nIn a shocking finding, scientist discovered a herd of dragons living in a remote, previously unexplored valley, in Tibet. Even more surprising to the researchers was the fact that the dragons spoke perfect Chinese.'

	if TOKEN_MODE == 'pile':
	ctx = tokenizer.tokenizer.encode(context)
	print(f'input len {len(ctx)} data {ctx}')

	########################################################################################################

	with torch.no_grad():
	print('\nRWKV-train output')
	out = model_train.forward(torch.tensor([ctx]).to(RUN_DEVICE))[0].detach().cpu().float().numpy()
	print(out, '\n')

	print('\nRWKV-RNN output')
	state = None
	out = None
	src_len = len(ctx)
	for i in range(src_len):
	x = ctx[:i+1]
	out, state = model_rnn.forward(x, state)
	if i < 3 or i >= src_len - 3:
	print(out.detach().cpu().numpy())
	if i == 2:
	print('...')