Spaces:
Sleeping
Sleeping
File size: 2,209 Bytes
44db2f9 676caef 44db2f9 676caef 62c6c3b 44db2f9 abff1ef 62c6c3b f05ece6 1bd428f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 |
"""
Functions that use multiple times
"""
import os
from torch import nn
import torch
import numpy as np
def v_wrap(np_array, dtype=np.float32):
if np_array.dtype != dtype:
np_array = np_array.astype(dtype)
return torch.from_numpy(np_array)
def set_init(layers):
for layer in layers:
nn.init.normal_(layer.weight, mean=0., std=0.1)
nn.init.constant_(layer.bias, 0.)
def push_and_pull(opt, lnet, gnet, done, s_, bs, ba, br, gamma):
if done:
v_s_ = 0. # terminal
else:
v_s_ = lnet.forward(v_wrap(s_[None, :]))[-1].data.numpy()[0, 0]
buffer_v_target = []
for r in br[::-1]: # reverse buffer r
v_s_ = r + gamma * v_s_
buffer_v_target.append(v_s_)
buffer_v_target.reverse()
loss = lnet.loss_func(
v_wrap(np.vstack(bs)),
v_wrap(np.array(ba), dtype=np.int64) if ba[0].dtype == np.int64 else v_wrap(np.vstack(ba)),
v_wrap(np.array(buffer_v_target)[:, None]))
# calculate local gradients and push local parameters to global
opt.zero_grad()
loss.backward()
for lp, gp in zip(lnet.parameters(), gnet.parameters()):
gp._grad = lp.grad
opt.step()
# pull global parameters
lnet.load_state_dict(gnet.state_dict())
def save_model(gnet, dir, episode, reward):
if reward >= 9 and episode % 100 == 0:
torch.save(gnet.state_dict(), os.path.join(dir, f'model_{episode}.pth'))
def record(global_ep, global_ep_r, ep_r, res_queue, name, goal_word, action, action_number, winning_ep):
with global_ep.get_lock():
global_ep.value += 1
with global_ep_r.get_lock():
if global_ep_r.value == 0.:
global_ep_r.value = ep_r
else:
global_ep_r.value = global_ep_r.value * 0.99 + ep_r * 0.01
res_queue.put(global_ep_r.value)
if goal_word == action:
winning_ep.value += 1
if global_ep.value % 100 == 0:
print(
name,
"Ep:", global_ep.value,
"| Ep_r: %.0f" % global_ep_r.value,
"| Goal :", goal_word,
"| Action: ", action,
"| Actions: ", action_number
)
|