''' Facade for reading logs on remote storage. ''' from collections import defaultdict import json import os from typing import Any from azure.storage.fileshare import ShareServiceClient class RemoteLogReader: ''' remote log reader ''' LOG_CONNECTION_STRING = os.getenv("AZURE_STORAGE_CONNECTION_STRING") or "" LOG_SHARE_NAME = "swearenalogsfileshare" IMAGE_DIR_NAME = "serve_images" ''' Directory for storing user uploaded images. ''' CONV_LOG_DIR_NAME = "conv_logs" ''' Directory for conversation logs. ''' SANDBOX_LOG_DIR_NAME = "sandbox_logs" ''' Directory for sandbox logs. ''' CHAT_MODES = ["battle_anony", "battle_named", "direct"] def __init__( self, connection_string: str = LOG_CONNECTION_STRING, share_name: str = LOG_SHARE_NAME, ): if not connection_string: raise ValueError("Connection string is required.") if not share_name: raise ValueError("Share name is required.") self.share_service = ShareServiceClient.from_connection_string( conn_str=connection_string) self.share_client = self.share_service.get_share_client(share=share_name) def is_conv_log(self, file_name: str) -> bool: return file_name.startswith("conv-log") and file_name.endswith(".json") def get_conv_id_from_name(self, file_name: str) -> str: return file_name.split("-")[2].strip('.json') def is_sandbox_log(self, file_name: str) -> bool: return file_name.startswith("sandbox-log") and file_name.endswith(".json") def get_file_content(self, file_path: str) -> bytes: file_client = self.share_client.get_file_client(file_path) file_content = file_client.download_file().readall() return file_content def get_conv_logs(self, date: str) -> dict[str, defaultdict[str, list[Any]]]: ''' Return conversation logs based on the date. Returns a dict: mode -> conv_id -> list of logs. ''' conv_logs = { mode: defaultdict(list) for mode in self.CHAT_MODES } for mode in self.CHAT_MODES: conv_log_dir = f"{date}/{self.CONV_LOG_DIR_NAME}/{mode}/" # check if the directory exists if not self.share_client.get_directory_client(conv_log_dir).exists(): continue for file in self.share_client.list_directories_and_files(conv_log_dir): if not self.is_conv_log(file.name): continue conv_id = self.get_conv_id_from_name(file.name) file_content = self.get_file_content( conv_log_dir + file.name).decode("utf-8").strip(' \n') for line in file_content.split('\n'): if line: conv_logs[mode][conv_id].append(json.loads(line)) return conv_logs def get_sandbox_logs(self, date: str) -> list[str]: ''' Return sandbox logs based on the date. ''' sandbox_logs = [] sandbox_log_dir = f"{date}/{self.SANDBOX_LOG_DIR_NAME}/" for file in self.share_client.list_directories_and_files(sandbox_log_dir): if self.is_sandbox_log(file.name): file_content = self.get_file_content( sandbox_log_dir + file.name).decode("utf-8").strip(' \n') sandbox_logs.append(json.loads(file_content)) return sandbox_logs def get_image(self, image_id: str) -> bytes: ''' Return image data based on the image id. ''' image_path = f"{self.IMAGE_DIR_NAME}/{image_id}.png" return self.get_file_content(image_path) if __name__ == "__main__": # Example usages log_reader = RemoteLogReader() date = "2025_02_20" conv_logs = log_reader.get_conv_logs(date) sandbox_logs = log_reader.get_sandbox_logs(date) image_data = log_reader.get_image("051fdac24285ff6e219a9ba06d1ac843") print(conv_logs) print(sandbox_logs) print(image_data)