Spaces:
Sleeping
Sleeping
import os | |
import psutil | |
from pympler.asizeof import asizeof | |
from tensorboardX import SummaryWriter | |
from typing import Optional, Callable | |
def random_collect( | |
policy_cfg: 'EasyDict', # noqa | |
policy: 'Policy', # noqa | |
RandomPolicy: 'Policy', # noqa | |
collector: 'ISerialCollector', # noqa | |
collector_env: 'BaseEnvManager', # noqa | |
replay_buffer: 'IBuffer', # noqa | |
postprocess_data_fn: Optional[Callable] = None | |
) -> None: # noqa | |
assert policy_cfg.random_collect_episode_num > 0 | |
random_policy = RandomPolicy(cfg=policy_cfg, action_space=collector_env.env_ref.action_space) | |
# set the policy to random policy | |
collector.reset_policy(random_policy.collect_mode) | |
# set temperature for visit count distributions according to the train_iter, | |
# please refer to Appendix D in MuZero paper for details. | |
collect_kwargs = {'temperature': 1, 'epsilon': 0.0} | |
# Collect data by default config n_sample/n_episode. | |
new_data = collector.collect(n_episode=policy_cfg.random_collect_episode_num, train_iter=0, policy_kwargs=collect_kwargs) | |
if postprocess_data_fn is not None: | |
new_data = postprocess_data_fn(new_data) | |
# save returned new_data collected by the collector | |
replay_buffer.push_game_segments(new_data) | |
# remove the oldest data if the replay buffer is full. | |
replay_buffer.remove_oldest_data_to_fit() | |
# restore the policy | |
collector.reset_policy(policy.collect_mode) | |
def log_buffer_memory_usage(train_iter: int, buffer: "GameBuffer", writer: SummaryWriter) -> None: | |
""" | |
Overview: | |
Log the memory usage of the buffer and the current process to TensorBoard. | |
Arguments: | |
- train_iter (:obj:`int`): The current training iteration. | |
- buffer (:obj:`GameBuffer`): The game buffer. | |
- writer (:obj:`SummaryWriter`): The TensorBoard writer. | |
""" | |
writer.add_scalar('Buffer/num_of_all_collected_episodes', buffer.num_of_collected_episodes, train_iter) | |
writer.add_scalar('Buffer/num_of_game_segments', len(buffer.game_segment_buffer), train_iter) | |
writer.add_scalar('Buffer/num_of_transitions', len(buffer.game_segment_game_pos_look_up), train_iter) | |
game_segment_buffer = buffer.game_segment_buffer | |
# Calculate the amount of memory occupied by self.game_segment_buffer (in bytes). | |
buffer_memory_usage = asizeof(game_segment_buffer) | |
# Convert buffer_memory_usage to megabytes (MB). | |
buffer_memory_usage_mb = buffer_memory_usage / (1024 * 1024) | |
# Record the memory usage of self.game_segment_buffer to TensorBoard. | |
writer.add_scalar('Buffer/memory_usage/game_segment_buffer', buffer_memory_usage_mb, train_iter) | |
# Get the amount of memory currently used by the process (in bytes). | |
process = psutil.Process(os.getpid()) | |
process_memory_usage = process.memory_info().rss | |
# Convert process_memory_usage to megabytes (MB). | |
process_memory_usage_mb = process_memory_usage / (1024 * 1024) | |
# Record the memory usage of the process to TensorBoard. | |
writer.add_scalar('Buffer/memory_usage/process', process_memory_usage_mb, train_iter) | |