Spaces:

OpenDILabCommunity
/

gomoku

Sleeping

App Files Files Community

gomoku / DI-engine /ding /hpc_rl /tests /test_upgo.py

zjowowen

init space

3dfe8fb over 1 year ago

raw

history blame contribute delete

4.38 kB

	import time
	import torch
	from hpc_rll.origin.upgo import upgo_loss
	from hpc_rll.rl_utils.upgo import UPGO
	from testbase import mean_relative_error, times

	assert torch.cuda.is_available()
	use_cuda = True

	T = 256
	B = 256
	N = 256


	def upgo_val():
	ori_target_output = torch.randn(T, B, N)
	ori_rhos = torch.randn(T, B)
	ori_action = torch.randint(
	0, N, size=(
	T,
	B,
	)
	)
	ori_rewards = torch.randn(T, B)
	ori_bootstrap_values = torch.randn(T + 1, B)

	hpc_target_output = ori_target_output.clone().detach()
	hpc_rhos = ori_rhos.clone().detach()
	hpc_action = ori_action.clone().detach()
	hpc_rewards = ori_rewards.clone().detach()
	hpc_bootstrap_values = ori_bootstrap_values.clone().detach()
	hpc_upgo = UPGO(T, B, N)

	if use_cuda:
	ori_target_output = ori_target_output.cuda()
	ori_rhos = ori_rhos.cuda()
	ori_action = ori_action.cuda()
	ori_rewards = ori_rewards.cuda()
	ori_bootstrap_values = ori_bootstrap_values.cuda()

	hpc_target_output = hpc_target_output.cuda()
	hpc_rhos = hpc_rhos.cuda()
	hpc_action = hpc_action.cuda()
	hpc_rewards = hpc_rewards.cuda()
	hpc_bootstrap_values = hpc_bootstrap_values.cuda()
	hpc_upgo = hpc_upgo.cuda()

	ori_target_output.requires_grad_(True)
	ori_loss = upgo_loss(ori_target_output, ori_rhos, ori_action, ori_rewards, ori_bootstrap_values)
	ori_loss = ori_loss.mean()
	ori_loss.backward()
	if use_cuda:
	torch.cuda.synchronize()

	hpc_target_output.requires_grad_(True)
	hpc_loss = hpc_upgo(hpc_target_output, hpc_rhos, hpc_action, hpc_rewards, hpc_bootstrap_values)
	hpc_loss = hpc_loss.mean()
	hpc_loss.backward()
	if use_cuda:
	torch.cuda.synchronize()

	mre = mean_relative_error(
	torch.flatten(ori_loss).cpu().detach().numpy(),
	torch.flatten(hpc_loss).cpu().detach().numpy()
	)
	print("upgo fp mean_relative_error: " + str(mre))
	mre = mean_relative_error(
	torch.flatten(ori_target_output.grad).cpu().detach().numpy(),
	torch.flatten(hpc_target_output.grad).cpu().detach().numpy()
	)
	print("upgo bp mean_relative_error: " + str(mre))


	def upgo_perf():
	ori_target_output = torch.randn(T, B, N)
	ori_rhos = torch.randn(T, B)
	ori_action = torch.randint(
	0, N, size=(
	T,
	B,
	)
	)
	ori_rewards = torch.randn(T, B)
	ori_bootstrap_values = torch.randn(T + 1, B)

	hpc_target_output = ori_target_output.clone().detach()
	hpc_rhos = ori_rhos.clone().detach()
	hpc_action = ori_action.clone().detach()
	hpc_rewards = ori_rewards.clone().detach()
	hpc_bootstrap_values = ori_bootstrap_values.clone().detach()
	hpc_upgo = UPGO(T, B, N)

	if use_cuda:
	ori_target_output = ori_target_output.cuda()
	ori_rhos = ori_rhos.cuda()
	ori_action = ori_action.cuda()
	ori_rewards = ori_rewards.cuda()
	ori_bootstrap_values = ori_bootstrap_values.cuda()

	hpc_target_output = hpc_target_output.cuda()
	hpc_rhos = hpc_rhos.cuda()
	hpc_action = hpc_action.cuda()
	hpc_rewards = hpc_rewards.cuda()
	hpc_bootstrap_values = hpc_bootstrap_values.cuda()
	hpc_upgo = hpc_upgo.cuda()

	ori_target_output.requires_grad_(True)
	for i in range(times):
	t = time.time()
	ori_loss = upgo_loss(ori_target_output, ori_rhos, ori_action, ori_rewards, ori_bootstrap_values)
	ori_loss = ori_loss.mean()
	ori_loss.backward()
	if use_cuda:
	torch.cuda.synchronize()
	print('epoch: {}, original upgo cost time: {}'.format(i, time.time() - t))

	hpc_target_output.requires_grad_(True)
	for i in range(times):
	t = time.time()
	hpc_loss = hpc_upgo(hpc_target_output, hpc_rhos, hpc_action, hpc_rewards, hpc_bootstrap_values)
	hpc_loss = hpc_loss.mean()
	hpc_loss.backward()
	if use_cuda:
	torch.cuda.synchronize()
	print('epoch: {}, hpc upgo cost time: {}'.format(i, time.time() - t))


	if __name__ == '__main__':
	print("target problem: T = {}, B = {}, N = {}".format(T, B, N))
	print("================run upgo validation test================")
	upgo_val()
	print("================run upgo performance test================")
	upgo_perf()