import numpy as np import math import matplotlib.pyplot as plt import seaborn as sns plt.style.use('seaborn-white') import pandas as pd from matplotlib import animation, rc import torch.nn.functional as F import torch import torch.nn as nn import torch.optim as optim plt.rcParams.update({'pdf.fonttype': 'truetype'}) import pickle pc2 = pickle.load(open('price.pkl','rb')) def to_tensor(x): return torch.from_numpy(np.array(x).astype(np.float32)) def prediction(price_max,price_step,policy_net): price_grid = np.arange(price_step, price_max, price_step) sample_state = [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., \ 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.] Q_s = policy_net(to_tensor(sample_state)) a_opt = Q_s.max(0)[1].detach() print(f'Optimal price action {price_grid[a_opt]}') plt.figure(figsize=(16, 5)) plt.xlabel("Price action ($)") plt.ylabel("Q ($)") plt.bar(price_grid, Q_s.detach().numpy(), color='crimson', width=6, alpha=0.8) plt.show() prediction(500,10,pc2)