Spaces:

TroglodyteDerivations
/

Meta_Learning_With_HRL_Homer_Simpson_Lecture

Running

Meta_Learning_With_HRL_Homer_Simpson_Lecture / app.py

Updated lines 45-86 with: # Display the formulation with parameters plugged in st.write("Example 2 via Method 2:") st.latex(r""" r_{t}^{int} \eta \frac{1}{\sqrt{N(s_{t}) + \epsilon}} = 0.1 \frac{1}{\sqrt{2 + 1 x 10^{-5}}} """) # Abstract Base Class for Intrinsic Reward Calculation class IntrinsicRewardCalculator(ABC): @abstractmethod def calculate_intrinsic_reward(self, eta, count, epsilon): pass # Concrete Class for Intrinsic Reward Calculation class ConcreteIntrinsicRewardCalculator(IntrinsicRewardCalculator): def calculate_intrinsic_reward(self, eta, count, epsilon): return eta * (1 / np.sqrt(count + epsilon)) def populate_df_0_0(self, df_0_0, eta, count, epsilon): intrinsic_reward = self.calculate_intrinsic_reward(eta, count, epsilon) df_0_0.at[0, 'Intrinsic Reward'] = intrinsic_reward return df_0_0 # Example 2 parameters eta = 0.1 count = 2 epsilon = 1e-5 x,y = 0,0 # Create instance for Intrinsic Reward Calculation irc = ConcreteIntrinsicRewardCalculator() intrinsic_reward = irc.calculate_intrinsic_reward(0.1, 2, 1e-5) st.write(f"Intrinsic Reward @ {count} @ Coordinates {x,y}:", intrinsic_reward) st.write(f"Intrinsic Reward @ {count} @ Coordinates {x,y} rounded 6 decimal places:", np.round(intrinsic_reward,6)) # Populate the DataFrame with the calculated intrinsic reward df_0_0 = irc.populate_df_0_0(df_0_0, eta, count, epsilon) # Display the updated DataFrame st.write(df_0_0[1:2])

94c96eb verified 1 day ago

raw

history blame

4.87 kB

	import streamlit as st
	import pandas as pd
	import plotly.graph_objects as go
	import plotly.express as px
	import numpy as np
	from abc import ABC, abstractmethod


	# Set the title of the app
	st.title("Homer Simpson Meta-Learning with Hierarchical Reinforcement Learning Intrinsic Reward Lecture")

	# Display the image with a caption
	st.image("homer.webp", caption="Homer Simpson Meta-Learning HRL Lecture", use_column_width=True)

	# Display and play the audio files
	st.write("Audio Playback Meta-Learning with HRL Intrinsic Reward Lecture:")
	st.audio("h0.wav", format="audio/wav")
	st.audio("h1.wav", format="audio/wav")
	st.write("Oh, sweet Homer's doughnuts! If that second .wav file ain't playin', just download the darn thing! Mmm... downloads...")
	st.audio("h2.wav", format="audio/wav")
	st.image("intrinsic_reward_formulation.png", caption='Intrinsic Reward Formulation')
	st.write("Solving the first 5 equations @ (0,0):")
	st.write("Example 1 via Method 1:")

	df_0_0 = pd.read_csv('df_0_0.csv')
	st.write(df_0_0.shape)

	# Define parameters
	eta = 0.1
	N_st = 1
	epsilon = 1e-5

	# Intrinsic reward formulation
	r_t_int = eta * (1 / (N_st + epsilon)**0.5)

	# Display the formulation with parameters plugged in
	st.latex(r"""
	r_{t}^{int} \eta \frac{1}{\sqrt{N(s_{t}) + \epsilon}} = 0.1 \frac{1}{\sqrt{1 + 1 x 10^{-5}}}
	""")

	st.write(f"Calculated intrinsic reward: {r_t_int}")
	st.dataframe(df_0_0[:1])


	# Display the formulation with parameters plugged in
	st.write("Example 2 via Method 2:")
	st.latex(r"""
	r_{t}^{int} \eta \frac{1}{\sqrt{N(s_{t}) + \epsilon}} = 0.1 \frac{1}{\sqrt{2 + 1 x 10^{-5}}}
	""")


	# Abstract Base Class for Intrinsic Reward Calculation
	class IntrinsicRewardCalculator(ABC):
	@abstractmethod def calculate_intrinsic_reward(self, eta, count, epsilon):
	pass

	# Concrete Class for Intrinsic Reward Calculation
	class ConcreteIntrinsicRewardCalculator(IntrinsicRewardCalculator):
	def calculate_intrinsic_reward(self, eta, count, epsilon):
	return eta * (1 / np.sqrt(count + epsilon))

	def populate_df_0_0(self, df_0_0, eta, count, epsilon):
	intrinsic_reward = self.calculate_intrinsic_reward(eta, count, epsilon)
	df_0_0.at[0, 'Intrinsic Reward'] = intrinsic_reward
	return df_0_0

	# Example 2 parameters
	eta = 0.1
	count = 2
	epsilon = 1e-5
	x,y = 0,0

	# Create instance for Intrinsic Reward Calculation
	irc = ConcreteIntrinsicRewardCalculator()
	intrinsic_reward = irc.calculate_intrinsic_reward(0.1, 2, 1e-5)


	st.write(f"Intrinsic Reward @ {count} @ Coordinates {x,y}:", intrinsic_reward)
	st.write(f"Intrinsic Reward @ {count} @ Coordinates {x,y} rounded 6 decimal places:", np.round(intrinsic_reward,6))


	# Populate the DataFrame with the calculated intrinsic reward
	df_0_0 = irc.populate_df_0_0(df_0_0, eta, count, epsilon)

	# Display the updated DataFrame
	st.write(df_0_0[1:2])




	# Define the grid and visitations
	grid = np.zeros((6, 6))
	visitations = {
	(0, 0): 7035, (1, 0): 3579, (2, 0): 1359, (2, 1): 1707, (3, 1): 520, (4, 1): 227,
	(4, 2): 243, (5, 1): 217, (5, 2): 181, (5, 0): 241, (4, 0): 267, (5, 3): 179,
	(4, 3): 1034, (3, 3): 2163, (2, 3): 2080, (0, 1): 3313, (1, 1): 3015, (0, 2): 1846,
	(0, 3): 1104, (0, 4): 351, (1, 4): 518, (1, 3): 1497, (1, 2): 2236, (2, 2): 2239,
	(2, 4): 842, (1, 5): 238, (2, 5): 217, (0, 5): 341, (3, 5): 382, (4, 5): 1872,
	(4, 4): 2038, (3, 4): 1684, (3, 0): 383, (3, 2): 1102, (5, 4): 198
	}

	# Fill the grid with visitations
	for (x, y), count in visitations.items():
	grid[x, y] = count

	# Calculate the total number of visitations
	total_visitations = sum(visitations.values())

	# Calculate the percentages
	percentages = {state: (count / total_visitations) * 100 for state, count in visitations.items()}

	# Print the percentages in the specified order
	order = [
	(0, 0), (0, 1), (0, 2), (0, 3), (0, 4), (0, 5),
	(1, 0), (1, 1), (1, 2), (1, 3), (1, 4), (1, 5),
	(2, 0), (2, 1), (2, 2), (2, 3), (2, 4), (2, 5),
	(3, 0), (3, 1), (3, 2), (3, 3), (3, 4), (3, 5),
	(4, 0), (4, 1), (4, 2), (4, 3), (4, 4), (4, 5),
	(5, 0), (5, 1), (5, 2), (5, 3), (5, 4), (5, 5)
	]

	st.title("State Visitations Visualization")

	st.write("### State Visitations Percentages:")
	for state in order:
	st.write(f"State {state}: {percentages.get(state, 0):.2f}%")

	# Create a pie chart
	labels = [f"State {state}" for state in visitations.keys()]
	values = list(visitations.values())

	fig_pie = go.Figure(data=[go.Pie(labels=labels, values=values)])
	fig_pie.update_layout(title_text="State Visitations Pie Chart")
	st.plotly_chart(fig_pie)

	# Create a heatmap
	fig_heatmap = px.imshow(grid, labels=dict(x="Column", y="Row", color="Visitations"),
	x=list(range(6)), y=list(range(6)), title="State Visitations Heatmap")
	fig_heatmap.update_xaxes(side="top")
	st.plotly_chart(fig_heatmap)