File size: 8,325 Bytes
9fdc565
3a287f7
45d2337
 
 
94c96eb
9fdc565
3a287f7
9fdc565
 
 
 
 
 
 
 
45d2337
06aa953
fec75a6
06aa953
462fbcc
3a287f7
 
 
 
 
3b5b8d7
3a287f7
 
 
 
 
 
94c96eb
3a287f7
6a948bb
3a287f7
 
e6ae5b5
3a287f7
 
 
94c96eb
 
 
 
 
 
 
 
da76220
 
94c96eb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42b0add
3b5b8d7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d39e519
3b5b8d7
94c96eb
 
3b5b8d7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
94c96eb
 
 
3b5b8d7
3a287f7
3b5b8d7
 
 
 
 
 
 
 
 
 
 
 
c52326d
3b5b8d7
3a287f7
9962874
f743075
3a287f7
45d2337
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4a5d9e1
45d2337
4a5d9e1
 
 
8b6e2d8
4a5d9e1
15ac963
 
 
4a5d9e1
 
15ac963
4a5d9e1
15ac963
4a5d9e1
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
import streamlit as st
import pandas as pd 
import plotly.graph_objects as go
import plotly.express as px
import numpy as np
from abc import ABC, abstractmethod


# Set the title of the app
st.title("Homer Simpson Meta-Learning with Hierarchical Reinforcement Learning Intrinsic Reward Lecture")

# Display the image with a caption
st.image("homer.webp", caption="Homer Simpson Meta-Learning HRL Lecture", use_column_width=True)

# Display and play the audio files
st.write("Audio Playback Meta-Learning with HRL Intrinsic Reward Lecture:")
st.audio("h0.wav", format="audio/wav")
st.audio("h1.wav", format="audio/wav")
st.write("Oh, sweet Homer's doughnuts! If that second .wav file ain't playin', just download the darn thing! Mmm... downloads...")
st.audio("h2.wav", format="audio/wav")
st.image("intrinsic_reward_formulation.png", caption='Intrinsic Reward Formulation')
st.write("Solving the first 5 equations @ (0,0):")

df_0_0 = pd.read_csv('df_0_0.csv')
st.write(df_0_0.shape)

st.write("Example 1 via Method 1:")
# Define parameters
eta = 0.1
N_st = 1
epsilon = 1e-5
# Intrinsic reward formulation
r_t_int = eta * (1 / (N_st + epsilon)**0.5)
# Display the formulation with parameters plugged in
st.latex(r"""
r_{t}^{int} \eta \frac{1}{\sqrt{N(s_{t}) + \epsilon}} = 0.1 \frac{1}{\sqrt{1 + 1 x 10^{-5}}}
""")
st.write(f"Calculated intrinsic reward: {r_t_int}")
st.write(f"Calculated intrinsic reward rounded 2 decimal places:", np.round(r_t_int,2))
st.dataframe(df_0_0[:1])


# Display the formulation with parameters plugged in
st.write("Example 2 via Method 2:")
st.latex(r"""
r_{t}^{int} \eta \frac{1}{\sqrt{N(s_{t}) + \epsilon}} = 0.1 \frac{1}{\sqrt{2 + 1 x 10^{-5}}}
""")

# Abstract Base Class for Intrinsic Reward Calculation
class IntrinsicRewardCalculator(ABC):  
    @abstractmethod  
    def calculate_intrinsic_reward(self, eta, count, epsilon):    
        pass
        
# Concrete Class for Intrinsic Reward Calculation
class ConcreteIntrinsicRewardCalculator(IntrinsicRewardCalculator):  
    def calculate_intrinsic_reward(self, eta, count, epsilon):    
        return eta * (1 / np.sqrt(count + epsilon))  
    
    def populate_df_0_0(self, df_0_0, eta, count, epsilon):        
        intrinsic_reward = self.calculate_intrinsic_reward(eta, count, epsilon)       
        df_0_0.at[0, 'Intrinsic Reward'] = intrinsic_reward        
        return df_0_0

# Example 2 parameters
eta = 0.1
count = 2
epsilon = 1e-5
x,y = 0,0

# Create instance for Intrinsic Reward Calculation
irc = ConcreteIntrinsicRewardCalculator()
intrinsic_reward = irc.calculate_intrinsic_reward(0.1, 2, 1e-5)
st.write(f"Intrinsic Reward @ {count} @ Coordinates {x,y}:", intrinsic_reward)
st.write(f"Intrinsic Reward @ {count} @ Coordinates {x,y} rounded 4 decimal places:", np.round(intrinsic_reward,4))
# Populate the DataFrame with the calculated intrinsic reward
df_0_0 = irc.populate_df_0_0(df_0_0, eta, count, epsilon)
# Display the updated DataFrame
st.dataframe(df_0_0[1:2])

st.write("Example 3 via Method 1:")
# Example 3 parameters
eta = 0.1
N_st = 3
epsilon = 1e-5
# Intrinsic reward formulation
r_t_int = eta * (1 / (N_st + epsilon)**0.5)
# Display the formulation with parameters plugged in
st.latex(r"""
r_{t}^{int} \eta \frac{1}{\sqrt{N(s_{t}) + \epsilon}} = 0.1 \frac{1}{\sqrt{3 + 1 x 10^{-5}}}
""")
st.write(f"Calculated intrinsic reward: {r_t_int}")
st.write(f"Calculated intrinsic reward rounded 4 decimal places:", np.round(r_t_int,4))
st.dataframe(df_0_0[2:3])



# Display the formulation with parameters plugged in
st.write("Example 4 via Method 2:")
st.latex(r"""
r_{t}^{int} \eta \frac{1}{\sqrt{N(s_{t}) + \epsilon}} = 0.1 \frac{1}{\sqrt{4 + 1 x 10^{-5}}}
""")

# Abstract Base Class for Intrinsic Reward Calculation
class IntrinsicRewardCalculator(ABC):  
    @abstractmethod  
    def calculate_intrinsic_reward(self, eta, count, epsilon):    
        pass
        
# Concrete Class for Intrinsic Reward Calculation
class ConcreteIntrinsicRewardCalculator(IntrinsicRewardCalculator):  
    def calculate_intrinsic_reward(self, eta, count, epsilon):    
        return eta * (1 / np.sqrt(count + epsilon))  
    
    def populate_df_0_0(self, df_0_0, eta, count, epsilon):        
        intrinsic_reward = self.calculate_intrinsic_reward(eta, count, epsilon)       
        df_0_0.at[0, 'Intrinsic Reward'] = intrinsic_reward        
        return df_0_0

# Example 4 parameters
eta = 0.1
count = 4
epsilon = 1e-5
x,y = 0,0

# Create instance for Intrinsic Reward Calculation
irc = ConcreteIntrinsicRewardCalculator()
intrinsic_reward = irc.calculate_intrinsic_reward(0.1, 4, 1e-5)
st.write(f"Intrinsic Reward @ {count} @ Coordinates {x,y}:", intrinsic_reward)
st.write(f"Intrinsic Reward @ {count} @ Coordinates {x,y} rounded 2 decimal places:", np.round(intrinsic_reward,2))
# Populate the DataFrame with the calculated intrinsic reward
df_0_0 = irc.populate_df_0_0(df_0_0, eta, count, epsilon)
# Display the updated DataFrame
st.dataframe(df_0_0[3:4])

st.write("Example 5 via Method 1:")
# Example 5 parameters
eta = 0.1
N_st = 5
epsilon = 1e-5
# Intrinsic reward formulation
r_t_int = eta * (1 / (N_st + epsilon)**0.5)
# Display the formulation with parameters plugged in
st.latex(r"""
r_{t}^{int} \eta \frac{1}{\sqrt{N(s_{t}) + \epsilon}} = 0.1 \frac{1}{\sqrt{5 + 1 x 10^{-5}}}
""")
st.write(f"Calculated intrinsic reward: {r_t_int}")
st.write(f"Calculated intrinsic reward rounded 4 decimal places:", np.round(r_t_int,4))
st.dataframe(df_0_0[4:5])

st.write("Oh, sweet Krusty-licious! At coordinates (0,0) for that plotly visualization, we need a whopping 7035 intrinsic reward calculations to get things rollin'! And don't forget to update those State Visitations. Those were just the first five. Mmm... 7030 more to go... D'oh!")



# Define the grid and visitations
grid = np.zeros((6, 6))
visitations = {
    (0, 0): 7035, (1, 0): 3579, (2, 0): 1359, (2, 1): 1707, (3, 1): 520, (4, 1): 227,
    (4, 2): 243, (5, 1): 217, (5, 2): 181, (5, 0): 241, (4, 0): 267, (5, 3): 179,
    (4, 3): 1034, (3, 3): 2163, (2, 3): 2080, (0, 1): 3313, (1, 1): 3015, (0, 2): 1846,
    (0, 3): 1104, (0, 4): 351, (1, 4): 518, (1, 3): 1497, (1, 2): 2236, (2, 2): 2239,
    (2, 4): 842, (1, 5): 238, (2, 5): 217, (0, 5): 341, (3, 5): 382, (4, 5): 1872,
    (4, 4): 2038, (3, 4): 1684, (3, 0): 383, (3, 2): 1102, (5, 4): 198
}

# Fill the grid with visitations
for (x, y), count in visitations.items():
    grid[x, y] = count

# Calculate the total number of visitations
total_visitations = sum(visitations.values())

# Calculate the percentages
percentages = {state: (count / total_visitations) * 100 for state, count in visitations.items()}

# Print the percentages in the specified order
order = [
    (0, 0), (0, 1), (0, 2), (0, 3), (0, 4), (0, 5),
    (1, 0), (1, 1), (1, 2), (1, 3), (1, 4), (1, 5),
    (2, 0), (2, 1), (2, 2), (2, 3), (2, 4), (2, 5),
    (3, 0), (3, 1), (3, 2), (3, 3), (3, 4), (3, 5),
    (4, 0), (4, 1), (4, 2), (4, 3), (4, 4), (4, 5),
    (5, 0), (5, 1), (5, 2), (5, 3), (5, 4), (5, 5)
]

st.title("State Visitations Visualization")

st.write("### State Visitations Percentages:")
for state in order:
    st.write(f"State {state}: {percentages.get(state, 0):.2f}%")

# Create a pie chart
labels = [f"State {state}" for state in visitations.keys()]
values = list(visitations.values())

fig_pie = go.Figure(data=[go.Pie(labels=labels, values=values)])
fig_pie.update_layout(title_text="State Visitations Pie Chart")
st.plotly_chart(fig_pie)

# Create a heatmap
fig_heatmap = px.imshow(grid, labels=dict(x="Column", y="Row", color="Visitations"),
x=list(range(6)), y=list(range(6)), title="State Visitations Heatmap")
fig_heatmap.update_xaxes(side="top")
st.plotly_chart(fig_heatmap)

# Load the CSV data
df = pd.read_csv('goal_rows.csv')

# Aggregate the data to count the number of visits to each State_2D
visitation_counts = df['State_2D'].value_counts().reset_index()
visitation_counts.columns = ['State_2D', 'Visitation_Count']

# Create the Plotly bar chart
fig = px.bar(visitation_counts, x='State_2D', y='Visitation_Count', 
             title='Goal Position Visitation Counts',
             labels={'State_2D': 'State 2D', 'Visitation_Count': 'Visitation Count'})

# Display the plot using Streamlit
st.title('Goal Position Visitation Counts Visualization')
st.plotly_chart(fig)