File size: 8,325 Bytes
9fdc565 3a287f7 45d2337 94c96eb 9fdc565 3a287f7 9fdc565 45d2337 06aa953 fec75a6 06aa953 462fbcc 3a287f7 3b5b8d7 3a287f7 94c96eb 3a287f7 6a948bb 3a287f7 e6ae5b5 3a287f7 94c96eb da76220 94c96eb 42b0add 3b5b8d7 d39e519 3b5b8d7 94c96eb 3b5b8d7 94c96eb 3b5b8d7 3a287f7 3b5b8d7 c52326d 3b5b8d7 3a287f7 9962874 f743075 3a287f7 45d2337 4a5d9e1 45d2337 4a5d9e1 8b6e2d8 4a5d9e1 15ac963 4a5d9e1 15ac963 4a5d9e1 15ac963 4a5d9e1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 |
import streamlit as st
import pandas as pd
import plotly.graph_objects as go
import plotly.express as px
import numpy as np
from abc import ABC, abstractmethod
# Set the title of the app
st.title("Homer Simpson Meta-Learning with Hierarchical Reinforcement Learning Intrinsic Reward Lecture")
# Display the image with a caption
st.image("homer.webp", caption="Homer Simpson Meta-Learning HRL Lecture", use_column_width=True)
# Display and play the audio files
st.write("Audio Playback Meta-Learning with HRL Intrinsic Reward Lecture:")
st.audio("h0.wav", format="audio/wav")
st.audio("h1.wav", format="audio/wav")
st.write("Oh, sweet Homer's doughnuts! If that second .wav file ain't playin', just download the darn thing! Mmm... downloads...")
st.audio("h2.wav", format="audio/wav")
st.image("intrinsic_reward_formulation.png", caption='Intrinsic Reward Formulation')
st.write("Solving the first 5 equations @ (0,0):")
df_0_0 = pd.read_csv('df_0_0.csv')
st.write(df_0_0.shape)
st.write("Example 1 via Method 1:")
# Define parameters
eta = 0.1
N_st = 1
epsilon = 1e-5
# Intrinsic reward formulation
r_t_int = eta * (1 / (N_st + epsilon)**0.5)
# Display the formulation with parameters plugged in
st.latex(r"""
r_{t}^{int} \eta \frac{1}{\sqrt{N(s_{t}) + \epsilon}} = 0.1 \frac{1}{\sqrt{1 + 1 x 10^{-5}}}
""")
st.write(f"Calculated intrinsic reward: {r_t_int}")
st.write(f"Calculated intrinsic reward rounded 2 decimal places:", np.round(r_t_int,2))
st.dataframe(df_0_0[:1])
# Display the formulation with parameters plugged in
st.write("Example 2 via Method 2:")
st.latex(r"""
r_{t}^{int} \eta \frac{1}{\sqrt{N(s_{t}) + \epsilon}} = 0.1 \frac{1}{\sqrt{2 + 1 x 10^{-5}}}
""")
# Abstract Base Class for Intrinsic Reward Calculation
class IntrinsicRewardCalculator(ABC):
@abstractmethod
def calculate_intrinsic_reward(self, eta, count, epsilon):
pass
# Concrete Class for Intrinsic Reward Calculation
class ConcreteIntrinsicRewardCalculator(IntrinsicRewardCalculator):
def calculate_intrinsic_reward(self, eta, count, epsilon):
return eta * (1 / np.sqrt(count + epsilon))
def populate_df_0_0(self, df_0_0, eta, count, epsilon):
intrinsic_reward = self.calculate_intrinsic_reward(eta, count, epsilon)
df_0_0.at[0, 'Intrinsic Reward'] = intrinsic_reward
return df_0_0
# Example 2 parameters
eta = 0.1
count = 2
epsilon = 1e-5
x,y = 0,0
# Create instance for Intrinsic Reward Calculation
irc = ConcreteIntrinsicRewardCalculator()
intrinsic_reward = irc.calculate_intrinsic_reward(0.1, 2, 1e-5)
st.write(f"Intrinsic Reward @ {count} @ Coordinates {x,y}:", intrinsic_reward)
st.write(f"Intrinsic Reward @ {count} @ Coordinates {x,y} rounded 4 decimal places:", np.round(intrinsic_reward,4))
# Populate the DataFrame with the calculated intrinsic reward
df_0_0 = irc.populate_df_0_0(df_0_0, eta, count, epsilon)
# Display the updated DataFrame
st.dataframe(df_0_0[1:2])
st.write("Example 3 via Method 1:")
# Example 3 parameters
eta = 0.1
N_st = 3
epsilon = 1e-5
# Intrinsic reward formulation
r_t_int = eta * (1 / (N_st + epsilon)**0.5)
# Display the formulation with parameters plugged in
st.latex(r"""
r_{t}^{int} \eta \frac{1}{\sqrt{N(s_{t}) + \epsilon}} = 0.1 \frac{1}{\sqrt{3 + 1 x 10^{-5}}}
""")
st.write(f"Calculated intrinsic reward: {r_t_int}")
st.write(f"Calculated intrinsic reward rounded 4 decimal places:", np.round(r_t_int,4))
st.dataframe(df_0_0[2:3])
# Display the formulation with parameters plugged in
st.write("Example 4 via Method 2:")
st.latex(r"""
r_{t}^{int} \eta \frac{1}{\sqrt{N(s_{t}) + \epsilon}} = 0.1 \frac{1}{\sqrt{4 + 1 x 10^{-5}}}
""")
# Abstract Base Class for Intrinsic Reward Calculation
class IntrinsicRewardCalculator(ABC):
@abstractmethod
def calculate_intrinsic_reward(self, eta, count, epsilon):
pass
# Concrete Class for Intrinsic Reward Calculation
class ConcreteIntrinsicRewardCalculator(IntrinsicRewardCalculator):
def calculate_intrinsic_reward(self, eta, count, epsilon):
return eta * (1 / np.sqrt(count + epsilon))
def populate_df_0_0(self, df_0_0, eta, count, epsilon):
intrinsic_reward = self.calculate_intrinsic_reward(eta, count, epsilon)
df_0_0.at[0, 'Intrinsic Reward'] = intrinsic_reward
return df_0_0
# Example 4 parameters
eta = 0.1
count = 4
epsilon = 1e-5
x,y = 0,0
# Create instance for Intrinsic Reward Calculation
irc = ConcreteIntrinsicRewardCalculator()
intrinsic_reward = irc.calculate_intrinsic_reward(0.1, 4, 1e-5)
st.write(f"Intrinsic Reward @ {count} @ Coordinates {x,y}:", intrinsic_reward)
st.write(f"Intrinsic Reward @ {count} @ Coordinates {x,y} rounded 2 decimal places:", np.round(intrinsic_reward,2))
# Populate the DataFrame with the calculated intrinsic reward
df_0_0 = irc.populate_df_0_0(df_0_0, eta, count, epsilon)
# Display the updated DataFrame
st.dataframe(df_0_0[3:4])
st.write("Example 5 via Method 1:")
# Example 5 parameters
eta = 0.1
N_st = 5
epsilon = 1e-5
# Intrinsic reward formulation
r_t_int = eta * (1 / (N_st + epsilon)**0.5)
# Display the formulation with parameters plugged in
st.latex(r"""
r_{t}^{int} \eta \frac{1}{\sqrt{N(s_{t}) + \epsilon}} = 0.1 \frac{1}{\sqrt{5 + 1 x 10^{-5}}}
""")
st.write(f"Calculated intrinsic reward: {r_t_int}")
st.write(f"Calculated intrinsic reward rounded 4 decimal places:", np.round(r_t_int,4))
st.dataframe(df_0_0[4:5])
st.write("Oh, sweet Krusty-licious! At coordinates (0,0) for that plotly visualization, we need a whopping 7035 intrinsic reward calculations to get things rollin'! And don't forget to update those State Visitations. Those were just the first five. Mmm... 7030 more to go... D'oh!")
# Define the grid and visitations
grid = np.zeros((6, 6))
visitations = {
(0, 0): 7035, (1, 0): 3579, (2, 0): 1359, (2, 1): 1707, (3, 1): 520, (4, 1): 227,
(4, 2): 243, (5, 1): 217, (5, 2): 181, (5, 0): 241, (4, 0): 267, (5, 3): 179,
(4, 3): 1034, (3, 3): 2163, (2, 3): 2080, (0, 1): 3313, (1, 1): 3015, (0, 2): 1846,
(0, 3): 1104, (0, 4): 351, (1, 4): 518, (1, 3): 1497, (1, 2): 2236, (2, 2): 2239,
(2, 4): 842, (1, 5): 238, (2, 5): 217, (0, 5): 341, (3, 5): 382, (4, 5): 1872,
(4, 4): 2038, (3, 4): 1684, (3, 0): 383, (3, 2): 1102, (5, 4): 198
}
# Fill the grid with visitations
for (x, y), count in visitations.items():
grid[x, y] = count
# Calculate the total number of visitations
total_visitations = sum(visitations.values())
# Calculate the percentages
percentages = {state: (count / total_visitations) * 100 for state, count in visitations.items()}
# Print the percentages in the specified order
order = [
(0, 0), (0, 1), (0, 2), (0, 3), (0, 4), (0, 5),
(1, 0), (1, 1), (1, 2), (1, 3), (1, 4), (1, 5),
(2, 0), (2, 1), (2, 2), (2, 3), (2, 4), (2, 5),
(3, 0), (3, 1), (3, 2), (3, 3), (3, 4), (3, 5),
(4, 0), (4, 1), (4, 2), (4, 3), (4, 4), (4, 5),
(5, 0), (5, 1), (5, 2), (5, 3), (5, 4), (5, 5)
]
st.title("State Visitations Visualization")
st.write("### State Visitations Percentages:")
for state in order:
st.write(f"State {state}: {percentages.get(state, 0):.2f}%")
# Create a pie chart
labels = [f"State {state}" for state in visitations.keys()]
values = list(visitations.values())
fig_pie = go.Figure(data=[go.Pie(labels=labels, values=values)])
fig_pie.update_layout(title_text="State Visitations Pie Chart")
st.plotly_chart(fig_pie)
# Create a heatmap
fig_heatmap = px.imshow(grid, labels=dict(x="Column", y="Row", color="Visitations"),
x=list(range(6)), y=list(range(6)), title="State Visitations Heatmap")
fig_heatmap.update_xaxes(side="top")
st.plotly_chart(fig_heatmap)
# Load the CSV data
df = pd.read_csv('goal_rows.csv')
# Aggregate the data to count the number of visits to each State_2D
visitation_counts = df['State_2D'].value_counts().reset_index()
visitation_counts.columns = ['State_2D', 'Visitation_Count']
# Create the Plotly bar chart
fig = px.bar(visitation_counts, x='State_2D', y='Visitation_Count',
title='Goal Position Visitation Counts',
labels={'State_2D': 'State 2D', 'Visitation_Count': 'Visitation Count'})
# Display the plot using Streamlit
st.title('Goal Position Visitation Counts Visualization')
st.plotly_chart(fig) |