File size: 4,105 Bytes
020b18b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
'''
Facade for reading logs on remote storage.
'''

from collections import defaultdict
import json
import os
from typing import Any
from azure.storage.fileshare import ShareServiceClient


class RemoteLogReader:
    '''
    remote log reader
    '''

    LOG_CONNECTION_STRING = os.getenv("AZURE_STORAGE_CONNECTION_STRING") or ""
    LOG_SHARE_NAME = "swearenalogsfileshare"

    IMAGE_DIR_NAME = "serve_images"
    '''
    Directory for storing user uploaded images.
    '''
    CONV_LOG_DIR_NAME = "conv_logs"
    '''
    Directory for conversation logs.
    '''
    SANDBOX_LOG_DIR_NAME = "sandbox_logs"
    '''
    Directory for sandbox logs.
    '''

    CHAT_MODES = ["battle_anony", "battle_named", "direct"]

    def __init__(
        self,
        connection_string: str = LOG_CONNECTION_STRING,
        share_name: str = LOG_SHARE_NAME,
    ):
        if not connection_string:
            raise ValueError("Connection string is required.")
        if not share_name:
            raise ValueError("Share name is required.")

        self.share_service = ShareServiceClient.from_connection_string(
            conn_str=connection_string)
        self.share_client = self.share_service.get_share_client(share=share_name)

    def is_conv_log(self, file_name: str) -> bool:
        return file_name.startswith("conv-log") and file_name.endswith(".json")

    def get_conv_id_from_name(self, file_name: str) -> str:
        return file_name.split("-")[2].strip('.json')

    def is_sandbox_log(self, file_name: str) -> bool:
        return file_name.startswith("sandbox-log") and file_name.endswith(".json")

    def get_file_content(self, file_path: str) -> bytes:
        file_client = self.share_client.get_file_client(file_path)
        file_content = file_client.download_file().readall()
        return file_content

    def get_conv_logs(self, date: str) -> dict[str, defaultdict[str, list[Any]]]:
        '''
        Return conversation logs based on the date.
        Returns a dict:
            mode -> conv_id -> list of logs.
        '''
        conv_logs = {
            mode: defaultdict(list) for mode in self.CHAT_MODES
        }
        for mode in self.CHAT_MODES:
            conv_log_dir = f"{date}/{self.CONV_LOG_DIR_NAME}/{mode}/"
            # check if the directory exists
            if not self.share_client.get_directory_client(conv_log_dir).exists():
                continue
            for file in self.share_client.list_directories_and_files(conv_log_dir):
                if not self.is_conv_log(file.name):
                    continue
                conv_id = self.get_conv_id_from_name(file.name)
                file_content = self.get_file_content(
                    conv_log_dir + file.name).decode("utf-8").strip(' \n')
                for line in file_content.split('\n'):
                    if line:
                        conv_logs[mode][conv_id].append(json.loads(line))
        return conv_logs

    def get_sandbox_logs(self, date: str) -> list[str]:
        '''
        Return sandbox logs based on the date.
        '''
        sandbox_logs = []
        sandbox_log_dir = f"{date}/{self.SANDBOX_LOG_DIR_NAME}/"
        for file in self.share_client.list_directories_and_files(sandbox_log_dir):
            if self.is_sandbox_log(file.name):
                file_content = self.get_file_content(
                    sandbox_log_dir + file.name).decode("utf-8").strip(' \n')
                sandbox_logs.append(json.loads(file_content))
        return sandbox_logs

    def get_image(self, image_id: str) -> bytes:
        '''
        Return image data based on the image id.
        '''
        image_path = f"{self.IMAGE_DIR_NAME}/{image_id}.png"
        return self.get_file_content(image_path)


if __name__ == "__main__":
    # Example usages
    log_reader = RemoteLogReader()
    date = "2025_02_20"
    conv_logs = log_reader.get_conv_logs(date)
    sandbox_logs = log_reader.get_sandbox_logs(date)
    image_data = log_reader.get_image("051fdac24285ff6e219a9ba06d1ac843")
    print(conv_logs)
    print(sandbox_logs)
    print(image_data)