Arnas
commited on
Commit
·
5cb9176
1
Parent(s):
6411c52
Add QDQN Mountain Car agent trained for 600 episodes
Browse files- README.md +59 -0
- config.yaml +24 -0
- example.py +26 -0
- model/__init__.py +0 -0
- model/agent.py +48 -0
- model/qnn.py +82 -0
- qdqn-MountainCar-v0.pt +3 -0
README.md
ADDED
@@ -0,0 +1,59 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
tags:
|
3 |
+
- MountainCar-v0
|
4 |
+
- deep-reinforcement-learning
|
5 |
+
- reinforcement-learning
|
6 |
+
model-index:
|
7 |
+
- name: QDQN
|
8 |
+
results:
|
9 |
+
- task:
|
10 |
+
type: reinforcement-learning
|
11 |
+
name: reinforcement-learning
|
12 |
+
dataset:
|
13 |
+
name: MountainCar-v0
|
14 |
+
type: MountainCar-v0
|
15 |
+
metrics:
|
16 |
+
- type: mean_reward
|
17 |
+
value: -200.0 +/- 0.0
|
18 |
+
name: mean_reward
|
19 |
+
verified: false
|
20 |
+
---
|
21 |
+
|
22 |
+
# **QDQN** Agent playing **MountainCar-v0**
|
23 |
+
This is a trained model of a **QDQN** agent playing **MountainCar-v0**
|
24 |
+
using the [qrl-dqn-gym](https://github.com/qdevpsi3/qrl-dqn-gym).
|
25 |
+
|
26 |
+
This agent has been trained for the [research project](https://github.com/agercas/QHack2023_QRL) during the QHack 2023
|
27 |
+
hackathon. The project explores the use of quantum algorithms in reinforcement learning.
|
28 |
+
More details about the project and the trained agent can be found in the [project repository](https://github.com/agercas/QHack2023_QRL).
|
29 |
+
|
30 |
+
|
31 |
+
## Usage
|
32 |
+
|
33 |
+
```python
|
34 |
+
import gym
|
35 |
+
import yaml
|
36 |
+
import torch
|
37 |
+
from model.qnn import QuantumNet
|
38 |
+
from model.agent import Agent
|
39 |
+
|
40 |
+
# Environment
|
41 |
+
env_name = 'MountainCar-v0'
|
42 |
+
env = gym.make(env_name)
|
43 |
+
|
44 |
+
# Network
|
45 |
+
with open('config.yaml', 'r') as f:
|
46 |
+
hparams = yaml.safe_load(f)
|
47 |
+
|
48 |
+
net = QuantumNet(
|
49 |
+
n_layers=hparams['n_layers'],
|
50 |
+
w_input=hparams['w_input'],
|
51 |
+
w_output=hparams['w_output'],
|
52 |
+
data_reupload=hparams['data_reupload']
|
53 |
+
)
|
54 |
+
state_dict = torch.load('qdqn-MountainCar-v0.pt', map_location=torch.device('cpu'))
|
55 |
+
net.load_state_dict(state_dict)
|
56 |
+
|
57 |
+
# Agent
|
58 |
+
agent = Agent(net)
|
59 |
+
```
|
config.yaml
ADDED
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
batch_size: 16
|
2 |
+
data_reupload: true
|
3 |
+
device: auto
|
4 |
+
eps_decay: 0.99
|
5 |
+
eps_init: 1.0
|
6 |
+
eps_min: 0.01
|
7 |
+
gamma: 0.99
|
8 |
+
log_ckp_freq: 50
|
9 |
+
log_eval_freq: 20
|
10 |
+
log_train_freq: 1
|
11 |
+
logging: true
|
12 |
+
loss: SmoothL1
|
13 |
+
lr: 0.001
|
14 |
+
lr_input: 0.001
|
15 |
+
lr_output: 0.1
|
16 |
+
memory: 10000
|
17 |
+
n_eval_episodes: 5
|
18 |
+
n_layers: 5
|
19 |
+
optimizer: RMSprop
|
20 |
+
target_freq: 30
|
21 |
+
total_episodes: 5000
|
22 |
+
train_freq: 10
|
23 |
+
w_input: true
|
24 |
+
w_output: true
|
example.py
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gym
|
2 |
+
import torch
|
3 |
+
import yaml
|
4 |
+
|
5 |
+
from model.agent import Agent
|
6 |
+
from model.qnn import QuantumNet
|
7 |
+
|
8 |
+
# Environment
|
9 |
+
env_name = 'MountainCar-v0'
|
10 |
+
env = gym.make(env_name)
|
11 |
+
|
12 |
+
# Network
|
13 |
+
with open('config.yaml', 'r') as f:
|
14 |
+
hparams = yaml.safe_load(f)
|
15 |
+
|
16 |
+
net = QuantumNet(
|
17 |
+
n_layers=hparams['n_layers'],
|
18 |
+
w_input=hparams['w_input'],
|
19 |
+
w_output=hparams['w_output'],
|
20 |
+
data_reupload=hparams['data_reupload']
|
21 |
+
)
|
22 |
+
state_dict = torch.load('qdqn-MountainCar-v0.pt', map_location=torch.device('cpu'))
|
23 |
+
net.load_state_dict(state_dict)
|
24 |
+
|
25 |
+
# Agent
|
26 |
+
agent = Agent(net)
|
model/__init__.py
ADDED
File without changes
|
model/agent.py
ADDED
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import numpy as np
|
2 |
+
import torch
|
3 |
+
|
4 |
+
|
5 |
+
class Agent:
|
6 |
+
def __init__(self,
|
7 |
+
net,
|
8 |
+
action_space=None,
|
9 |
+
exploration_initial_eps=None,
|
10 |
+
exploration_decay=None,
|
11 |
+
exploration_final_eps=None):
|
12 |
+
|
13 |
+
self.net = net
|
14 |
+
self.action_space = action_space
|
15 |
+
self.exploration_initial_eps = exploration_initial_eps
|
16 |
+
self.exploration_decay = exploration_decay
|
17 |
+
self.exploration_final_eps = exploration_final_eps
|
18 |
+
self.epsilon = 0.
|
19 |
+
|
20 |
+
def __call__(self, state, device=torch.device('cpu')):
|
21 |
+
if np.random.random() < self.epsilon:
|
22 |
+
action = self.get_random_action()
|
23 |
+
else:
|
24 |
+
action = self.get_action(state, device)
|
25 |
+
|
26 |
+
return action
|
27 |
+
|
28 |
+
def get_random_action(self):
|
29 |
+
action = self.action_space.sample()
|
30 |
+
return action
|
31 |
+
|
32 |
+
def get_action(self, state, device=torch.device('cpu')):
|
33 |
+
if not isinstance(state, torch.Tensor):
|
34 |
+
state = torch.tensor([state])
|
35 |
+
|
36 |
+
if device.type != 'cpu':
|
37 |
+
state = state.cuda(device)
|
38 |
+
|
39 |
+
q_values = self.net.eval()(state)
|
40 |
+
_, action = torch.max(q_values, dim=1)
|
41 |
+
return int(action.item())
|
42 |
+
|
43 |
+
def update_epsilon(self, step):
|
44 |
+
self.epsilon = max(
|
45 |
+
self.exploration_final_eps, self.exploration_final_eps +
|
46 |
+
(self.exploration_initial_eps - self.exploration_final_eps) *
|
47 |
+
self.exploration_decay**step)
|
48 |
+
return self.epsilon
|
model/qnn.py
ADDED
@@ -0,0 +1,82 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pennylane as qml
|
2 |
+
import torch
|
3 |
+
import torch.nn as nn
|
4 |
+
from torch.nn.parameter import Parameter
|
5 |
+
|
6 |
+
|
7 |
+
def encode(n_qubits, inputs):
|
8 |
+
for wire in range(n_qubits):
|
9 |
+
qml.RX(inputs[wire], wires=wire)
|
10 |
+
|
11 |
+
|
12 |
+
def layer(n_qubits, y_weight, z_weight):
|
13 |
+
for wire, y_weight in enumerate(y_weight):
|
14 |
+
qml.RY(y_weight, wires=wire)
|
15 |
+
for wire, z_weight in enumerate(z_weight):
|
16 |
+
qml.RZ(z_weight, wires=wire)
|
17 |
+
for wire in range(n_qubits):
|
18 |
+
qml.CZ(wires=[wire, (wire + 1) % n_qubits])
|
19 |
+
|
20 |
+
|
21 |
+
def measure(n_qubits):
|
22 |
+
return [qml.expval(qml.PauliZ(wire)) for wire in range(n_qubits)]
|
23 |
+
|
24 |
+
|
25 |
+
def get_model(n_qubits, n_layers, data_reupload):
|
26 |
+
# NOTE: need to select an appropriate device
|
27 |
+
# dev = qml.device('lightning.gpu', wires=n_qubits)
|
28 |
+
dev = qml.device("default.qubit", wires=n_qubits)
|
29 |
+
shapes = {
|
30 |
+
"y_weights": (n_layers, n_qubits),
|
31 |
+
"z_weights": (n_layers, n_qubits)
|
32 |
+
}
|
33 |
+
|
34 |
+
@qml.qnode(dev, interface='torch')
|
35 |
+
def circuit(inputs, y_weights, z_weights):
|
36 |
+
for layer_idx in range(n_layers):
|
37 |
+
if (layer_idx == 0) or data_reupload:
|
38 |
+
encode(n_qubits, inputs)
|
39 |
+
layer(n_qubits, y_weights[layer_idx], z_weights[layer_idx])
|
40 |
+
return measure(n_qubits)
|
41 |
+
|
42 |
+
model = qml.qnn.TorchLayer(circuit, shapes)
|
43 |
+
|
44 |
+
return model
|
45 |
+
|
46 |
+
|
47 |
+
class QuantumNet(nn.Module):
|
48 |
+
def __init__(self, n_layers, w_input, w_output, data_reupload):
|
49 |
+
super(QuantumNet, self).__init__()
|
50 |
+
self.n_qubits = 2
|
51 |
+
self.n_actions = 3
|
52 |
+
self.data_reupload = data_reupload
|
53 |
+
self.q_layers = get_model(n_qubits=self.n_qubits, n_layers=n_layers, data_reupload=data_reupload)
|
54 |
+
# convert from 2 qubits to 3 actions
|
55 |
+
# not adding more complexity here because we want to learn through quantum circuit
|
56 |
+
self.layer1 = nn.Linear(2, 3)
|
57 |
+
|
58 |
+
if w_input:
|
59 |
+
self.w_input = Parameter(torch.Tensor(self.n_qubits))
|
60 |
+
nn.init.normal_(self.w_input)
|
61 |
+
else:
|
62 |
+
self.register_parameter("w_input", None)
|
63 |
+
if w_output:
|
64 |
+
self.w_output = Parameter(torch.Tensor(self.n_actions))
|
65 |
+
nn.init.normal_(self.w_output, mean=90.0)
|
66 |
+
else:
|
67 |
+
self.register_parameter("w_output", None)
|
68 |
+
|
69 |
+
def forward(self, inputs):
|
70 |
+
if self.w_input is not None:
|
71 |
+
inputs = inputs * self.w_input
|
72 |
+
inputs = torch.atan(inputs)
|
73 |
+
q_outputs = self.q_layers(inputs)
|
74 |
+
q_outputs = (1 + q_outputs) / 2
|
75 |
+
|
76 |
+
outputs = self.layer1(q_outputs)
|
77 |
+
|
78 |
+
if self.w_output is not None:
|
79 |
+
outputs = outputs * self.w_output
|
80 |
+
else:
|
81 |
+
outputs = 90 * outputs
|
82 |
+
return outputs
|
qdqn-MountainCar-v0.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:af4b523003e5cc160264642e07f71c9353648347fc93ca3f58504969bccecae2
|
3 |
+
size 2231
|