Updated lines 209-220 with: # Aggregate the data to count the number of visits to each State_2D visitation_counts = df['State_2D'].value_counts().reset_index() visitation_counts.columns = ['State_2D', 'Visitation_Count'] # Create the Plotly bar chart fig = px.bar(visitation_counts, x='State_2D', y='Visitation_Count', title='Goal Position Visitation Counts', labels={'State_2D': 'State 2D', 'Visitation_Count': 'Visitation Count'}) # Display the plot using Streamlit st.title('Goal Position Visitation Counts Visualization') st.plotly_chart(fig)
15ac963
verified
import streamlit as st | |
import pandas as pd | |
import plotly.graph_objects as go | |
import plotly.express as px | |
import numpy as np | |
from abc import ABC, abstractmethod | |
# Set the title of the app | |
st.title("Homer Simpson Meta-Learning with Hierarchical Reinforcement Learning Intrinsic Reward Lecture") | |
# Display the image with a caption | |
st.image("homer.webp", caption="Homer Simpson Meta-Learning HRL Lecture", use_column_width=True) | |
# Display and play the audio files | |
st.write("Audio Playback Meta-Learning with HRL Intrinsic Reward Lecture:") | |
st.audio("h0.wav", format="audio/wav") | |
st.audio("h1.wav", format="audio/wav") | |
st.write("Oh, sweet Homer's doughnuts! If that second .wav file ain't playin', just download the darn thing! Mmm... downloads...") | |
st.audio("h2.wav", format="audio/wav") | |
st.image("intrinsic_reward_formulation.png", caption='Intrinsic Reward Formulation') | |
st.write("Solving the first 5 equations @ (0,0):") | |
df_0_0 = pd.read_csv('df_0_0.csv') | |
st.write(df_0_0.shape) | |
st.write("Example 1 via Method 1:") | |
# Define parameters | |
eta = 0.1 | |
N_st = 1 | |
epsilon = 1e-5 | |
# Intrinsic reward formulation | |
r_t_int = eta * (1 / (N_st + epsilon)**0.5) | |
# Display the formulation with parameters plugged in | |
st.latex(r""" | |
r_{t}^{int} \eta \frac{1}{\sqrt{N(s_{t}) + \epsilon}} = 0.1 \frac{1}{\sqrt{1 + 1 x 10^{-5}}} | |
""") | |
st.write(f"Calculated intrinsic reward: {r_t_int}") | |
st.write(f"Calculated intrinsic reward rounded 2 decimal places:", np.round(r_t_int,2)) | |
st.dataframe(df_0_0[:1]) | |
# Display the formulation with parameters plugged in | |
st.write("Example 2 via Method 2:") | |
st.latex(r""" | |
r_{t}^{int} \eta \frac{1}{\sqrt{N(s_{t}) + \epsilon}} = 0.1 \frac{1}{\sqrt{2 + 1 x 10^{-5}}} | |
""") | |
# Abstract Base Class for Intrinsic Reward Calculation | |
class IntrinsicRewardCalculator(ABC): | |
def calculate_intrinsic_reward(self, eta, count, epsilon): | |
pass | |
# Concrete Class for Intrinsic Reward Calculation | |
class ConcreteIntrinsicRewardCalculator(IntrinsicRewardCalculator): | |
def calculate_intrinsic_reward(self, eta, count, epsilon): | |
return eta * (1 / np.sqrt(count + epsilon)) | |
def populate_df_0_0(self, df_0_0, eta, count, epsilon): | |
intrinsic_reward = self.calculate_intrinsic_reward(eta, count, epsilon) | |
df_0_0.at[0, 'Intrinsic Reward'] = intrinsic_reward | |
return df_0_0 | |
# Example 2 parameters | |
eta = 0.1 | |
count = 2 | |
epsilon = 1e-5 | |
x,y = 0,0 | |
# Create instance for Intrinsic Reward Calculation | |
irc = ConcreteIntrinsicRewardCalculator() | |
intrinsic_reward = irc.calculate_intrinsic_reward(0.1, 2, 1e-5) | |
st.write(f"Intrinsic Reward @ {count} @ Coordinates {x,y}:", intrinsic_reward) | |
st.write(f"Intrinsic Reward @ {count} @ Coordinates {x,y} rounded 4 decimal places:", np.round(intrinsic_reward,4)) | |
# Populate the DataFrame with the calculated intrinsic reward | |
df_0_0 = irc.populate_df_0_0(df_0_0, eta, count, epsilon) | |
# Display the updated DataFrame | |
st.dataframe(df_0_0[1:2]) | |
st.write("Example 3 via Method 1:") | |
# Example 3 parameters | |
eta = 0.1 | |
N_st = 3 | |
epsilon = 1e-5 | |
# Intrinsic reward formulation | |
r_t_int = eta * (1 / (N_st + epsilon)**0.5) | |
# Display the formulation with parameters plugged in | |
st.latex(r""" | |
r_{t}^{int} \eta \frac{1}{\sqrt{N(s_{t}) + \epsilon}} = 0.1 \frac{1}{\sqrt{3 + 1 x 10^{-5}}} | |
""") | |
st.write(f"Calculated intrinsic reward: {r_t_int}") | |
st.write(f"Calculated intrinsic reward rounded 4 decimal places:", np.round(r_t_int,4)) | |
st.dataframe(df_0_0[2:3]) | |
# Display the formulation with parameters plugged in | |
st.write("Example 4 via Method 2:") | |
st.latex(r""" | |
r_{t}^{int} \eta \frac{1}{\sqrt{N(s_{t}) + \epsilon}} = 0.1 \frac{1}{\sqrt{4 + 1 x 10^{-5}}} | |
""") | |
# Abstract Base Class for Intrinsic Reward Calculation | |
class IntrinsicRewardCalculator(ABC): | |
def calculate_intrinsic_reward(self, eta, count, epsilon): | |
pass | |
# Concrete Class for Intrinsic Reward Calculation | |
class ConcreteIntrinsicRewardCalculator(IntrinsicRewardCalculator): | |
def calculate_intrinsic_reward(self, eta, count, epsilon): | |
return eta * (1 / np.sqrt(count + epsilon)) | |
def populate_df_0_0(self, df_0_0, eta, count, epsilon): | |
intrinsic_reward = self.calculate_intrinsic_reward(eta, count, epsilon) | |
df_0_0.at[0, 'Intrinsic Reward'] = intrinsic_reward | |
return df_0_0 | |
# Example 4 parameters | |
eta = 0.1 | |
count = 4 | |
epsilon = 1e-5 | |
x,y = 0,0 | |
# Create instance for Intrinsic Reward Calculation | |
irc = ConcreteIntrinsicRewardCalculator() | |
intrinsic_reward = irc.calculate_intrinsic_reward(0.1, 4, 1e-5) | |
st.write(f"Intrinsic Reward @ {count} @ Coordinates {x,y}:", intrinsic_reward) | |
st.write(f"Intrinsic Reward @ {count} @ Coordinates {x,y} rounded 2 decimal places:", np.round(intrinsic_reward,2)) | |
# Populate the DataFrame with the calculated intrinsic reward | |
df_0_0 = irc.populate_df_0_0(df_0_0, eta, count, epsilon) | |
# Display the updated DataFrame | |
st.dataframe(df_0_0[3:4]) | |
st.write("Example 5 via Method 1:") | |
# Example 5 parameters | |
eta = 0.1 | |
N_st = 5 | |
epsilon = 1e-5 | |
# Intrinsic reward formulation | |
r_t_int = eta * (1 / (N_st + epsilon)**0.5) | |
# Display the formulation with parameters plugged in | |
st.latex(r""" | |
r_{t}^{int} \eta \frac{1}{\sqrt{N(s_{t}) + \epsilon}} = 0.1 \frac{1}{\sqrt{5 + 1 x 10^{-5}}} | |
""") | |
st.write(f"Calculated intrinsic reward: {r_t_int}") | |
st.write(f"Calculated intrinsic reward rounded 4 decimal places:", np.round(r_t_int,4)) | |
st.dataframe(df_0_0[4:5]) | |
st.write("Oh, sweet Krusty-licious! At coordinates (0,0) for that plotly visualization, we need a whopping 7035 intrinsic reward calculations to get things rollin'! And don't forget to update those State Visitations. Those were just the first five. Mmm... 7030 more to go... D'oh!") | |
# Define the grid and visitations | |
grid = np.zeros((6, 6)) | |
visitations = { | |
(0, 0): 7035, (1, 0): 3579, (2, 0): 1359, (2, 1): 1707, (3, 1): 520, (4, 1): 227, | |
(4, 2): 243, (5, 1): 217, (5, 2): 181, (5, 0): 241, (4, 0): 267, (5, 3): 179, | |
(4, 3): 1034, (3, 3): 2163, (2, 3): 2080, (0, 1): 3313, (1, 1): 3015, (0, 2): 1846, | |
(0, 3): 1104, (0, 4): 351, (1, 4): 518, (1, 3): 1497, (1, 2): 2236, (2, 2): 2239, | |
(2, 4): 842, (1, 5): 238, (2, 5): 217, (0, 5): 341, (3, 5): 382, (4, 5): 1872, | |
(4, 4): 2038, (3, 4): 1684, (3, 0): 383, (3, 2): 1102, (5, 4): 198 | |
} | |
# Fill the grid with visitations | |
for (x, y), count in visitations.items(): | |
grid[x, y] = count | |
# Calculate the total number of visitations | |
total_visitations = sum(visitations.values()) | |
# Calculate the percentages | |
percentages = {state: (count / total_visitations) * 100 for state, count in visitations.items()} | |
# Print the percentages in the specified order | |
order = [ | |
(0, 0), (0, 1), (0, 2), (0, 3), (0, 4), (0, 5), | |
(1, 0), (1, 1), (1, 2), (1, 3), (1, 4), (1, 5), | |
(2, 0), (2, 1), (2, 2), (2, 3), (2, 4), (2, 5), | |
(3, 0), (3, 1), (3, 2), (3, 3), (3, 4), (3, 5), | |
(4, 0), (4, 1), (4, 2), (4, 3), (4, 4), (4, 5), | |
(5, 0), (5, 1), (5, 2), (5, 3), (5, 4), (5, 5) | |
] | |
st.title("State Visitations Visualization") | |
st.write("### State Visitations Percentages:") | |
for state in order: | |
st.write(f"State {state}: {percentages.get(state, 0):.2f}%") | |
# Create a pie chart | |
labels = [f"State {state}" for state in visitations.keys()] | |
values = list(visitations.values()) | |
fig_pie = go.Figure(data=[go.Pie(labels=labels, values=values)]) | |
fig_pie.update_layout(title_text="State Visitations Pie Chart") | |
st.plotly_chart(fig_pie) | |
# Create a heatmap | |
fig_heatmap = px.imshow(grid, labels=dict(x="Column", y="Row", color="Visitations"), | |
x=list(range(6)), y=list(range(6)), title="State Visitations Heatmap") | |
fig_heatmap.update_xaxes(side="top") | |
st.plotly_chart(fig_heatmap) | |
# Load the CSV data | |
df = pd.read_csv('goal_rows.csv') | |
# Aggregate the data to count the number of visits to each State_2D | |
visitation_counts = df['State_2D'].value_counts().reset_index() | |
visitation_counts.columns = ['State_2D', 'Visitation_Count'] | |
# Create the Plotly bar chart | |
fig = px.bar(visitation_counts, x='State_2D', y='Visitation_Count', | |
title='Goal Position Visitation Counts', | |
labels={'State_2D': 'State 2D', 'Visitation_Count': 'Visitation Count'}) | |
# Display the plot using Streamlit | |
st.title('Goal Position Visitation Counts Visualization') | |
st.plotly_chart(fig) |