TroglodyteDerivations
commited on
Commit
•
9ff167f
1
Parent(s):
b288f24
Updated lines 153-189 with: # Display the formulation with parameters plugged in st.write("Last Example in df_0_0 DataFrame 7035 via Method 2:") st.latex(r""" r_{t}^{int} \eta \frac{1}{\sqrt{N(s_{t}) + \epsilon}} = 0.1 \frac{1}{\sqrt{7035 + 1(10^{-5})}} """) # Abstract Base Class for Intrinsic Reward Calculation class IntrinsicRewardCalculator(ABC): @abstractmethod def calculate_intrinsic_reward(self, eta, count, epsilon): pass # Concrete Class for Intrinsic Reward Calculation class ConcreteIntrinsicRewardCalculator(IntrinsicRewardCalculator): def calculate_intrinsic_reward(self, eta, count, epsilon): return eta * (1 / np.sqrt(count + epsilon)) def populate_df_0_0(self, df_0_0, eta, count, epsilon): intrinsic_reward = self.calculate_intrinsic_reward(eta, count, epsilon) df_0_0.at[0, 'Intrinsic Reward'] = intrinsic_reward return df_0_0 # Example 4 parameters eta = 0.1 count = 7035 epsilon = 1e-5 x,y = 0,0 # Create instance for Intrinsic Reward Calculation irc = ConcreteIntrinsicRewardCalculator() intrinsic_reward = irc.calculate_intrinsic_reward(0.1, 7035, 1e-5) st.write(f"Intrinsic Reward @ {count} @ Coordinates {x,y}:", intrinsic_reward) st.write(f"Intrinsic Reward @ {count} @ Coordinates {x,y} rounded 2 decimal places:", np.round(intrinsic_reward,2)) # Populate the DataFrame with the calculated intrinsic reward df_0_0 = irc.populate_df_0_0(df_0_0, eta, count, epsilon) # Display the updated DataFrame st.dataframe(df_0_0[7034:7035])
Browse files
app.py
CHANGED
@@ -148,7 +148,45 @@ st.write(f"Calculated intrinsic reward: {r_t_int}")
|
|
148 |
st.write(f"Calculated intrinsic reward rounded 4 decimal places:", np.round(r_t_int,4))
|
149 |
st.dataframe(df_0_0[4:5])
|
150 |
|
151 |
-
st.write("Oh, sweet Krusty-licious! At coordinates (0,0) for that plotly visualization, we need a whopping
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
152 |
|
153 |
|
154 |
|
|
|
148 |
st.write(f"Calculated intrinsic reward rounded 4 decimal places:", np.round(r_t_int,4))
|
149 |
st.dataframe(df_0_0[4:5])
|
150 |
|
151 |
+
st.write("Oh, sweet Krusty-licious! At coordinates (0,0) for that plotly visualization, we need a whopping 7036 intrinsic reward calculations to get things rollin'! And don't forget to update those State Visitations. Those were just the first five. Mmm... 7031 more to go... remember the oscillation starts at zero not 1 D'oh!")
|
152 |
+
|
153 |
+
# Display the formulation with parameters plugged in
|
154 |
+
st.write("Last Example in df_0_0 DataFrame 7035 via Method 2:")
|
155 |
+
st.latex(r"""
|
156 |
+
r_{t}^{int} \eta \frac{1}{\sqrt{N(s_{t}) + \epsilon}} = 0.1 \frac{1}{\sqrt{7035 + 1(10^{-5})}}
|
157 |
+
""")
|
158 |
+
|
159 |
+
# Abstract Base Class for Intrinsic Reward Calculation
|
160 |
+
class IntrinsicRewardCalculator(ABC):
|
161 |
+
@abstractmethod
|
162 |
+
def calculate_intrinsic_reward(self, eta, count, epsilon):
|
163 |
+
pass
|
164 |
+
|
165 |
+
# Concrete Class for Intrinsic Reward Calculation
|
166 |
+
class ConcreteIntrinsicRewardCalculator(IntrinsicRewardCalculator):
|
167 |
+
def calculate_intrinsic_reward(self, eta, count, epsilon):
|
168 |
+
return eta * (1 / np.sqrt(count + epsilon))
|
169 |
+
|
170 |
+
def populate_df_0_0(self, df_0_0, eta, count, epsilon):
|
171 |
+
intrinsic_reward = self.calculate_intrinsic_reward(eta, count, epsilon)
|
172 |
+
df_0_0.at[0, 'Intrinsic Reward'] = intrinsic_reward
|
173 |
+
return df_0_0
|
174 |
+
|
175 |
+
# Example 4 parameters
|
176 |
+
eta = 0.1
|
177 |
+
count = 7035
|
178 |
+
epsilon = 1e-5
|
179 |
+
x,y = 0,0
|
180 |
+
|
181 |
+
# Create instance for Intrinsic Reward Calculation
|
182 |
+
irc = ConcreteIntrinsicRewardCalculator()
|
183 |
+
intrinsic_reward = irc.calculate_intrinsic_reward(0.1, 7035, 1e-5)
|
184 |
+
st.write(f"Intrinsic Reward @ {count} @ Coordinates {x,y}:", intrinsic_reward)
|
185 |
+
st.write(f"Intrinsic Reward @ {count} @ Coordinates {x,y} rounded 2 decimal places:", np.round(intrinsic_reward,2))
|
186 |
+
# Populate the DataFrame with the calculated intrinsic reward
|
187 |
+
df_0_0 = irc.populate_df_0_0(df_0_0, eta, count, epsilon)
|
188 |
+
# Display the updated DataFrame
|
189 |
+
st.dataframe(df_0_0[7034:7035])
|
190 |
|
191 |
|
192 |
|