File size: 3,044 Bytes
8573823
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
import io
import os
import time
import json
from threading import Thread, Lock

import streamlit as st
from huggingface_hub import HfApi

from src.utils import get_current_strftime


logger_lock = Lock()


def threaded(fn):
    def wrapper(*args, **kwargs):
        thread = Thread(target=fn, args=args, kwargs=kwargs)
        thread.start()
        return thread
    return wrapper


class Logger:
    def __init__(self):
        self.app_id = get_current_strftime()
        self.session_increment = 0
        self.query_increment = 0
        self.sync_interval = 180

        self.session_data = []
        self.query_data = []
        self.audio_data = []

        self.sync_data()

    def register_session(self) -> str:
        new_session_id = f"{self.app_id}+{self.session_increment}"
        with logger_lock:
            self.session_data.append({
                "session_id": new_session_id,
                "creation_time": get_current_strftime()
            })

            self.session_increment += 1
        return new_session_id

    def register_query(self, 
                       session_id, 
                       base64_audio, 
                       text_input, 
                       response,
                       **kwargs
                       ):
        new_query_id = self.query_increment
        current_time = get_current_strftime()

        with logger_lock:
            current_query_data = {
                "session_id": session_id,
                "query_id": new_query_id,
                "creation_time": current_time,
                "text": text_input,
                "response": response,
            }
            current_query_data.update(kwargs)
            self.query_data.append(current_query_data)

            self.audio_data.append({
                "session_id": session_id,
                "query_id": new_query_id,
                "creation_time": current_time,
                "audio": base64_audio,
            })
            self.query_increment += 1


    @threaded
    def sync_data(self):
        api = HfApi()

        while True:
            time.sleep(self.sync_interval)

            for data_name in ["session_data", "query_data", "audio_data"]:
                with logger_lock:
                    last_data = getattr(self, data_name, [])
                    setattr(self, data_name, [])

                if not last_data:
                    continue

                buffer = io.BytesIO()
                for row in last_data:
                    row_str = json.dumps(row, ensure_ascii=False)+"\n"
                    buffer.write(row_str.encode("utf-8"))

                api.upload_file(
                    path_or_fileobj=buffer,
                    path_in_repo=f"{data_name}/{get_current_strftime()}.json",
                    repo_id=os.getenv("LOGGING_REPO_NAME"),
                    repo_type="dataset",
                    token=os.getenv('HF_TOKEN')
                )

                buffer.close()


@st.cache_resource()
def load_logger():
    return Logger()