pedroferreira commited on
Commit
f6cfbcb
·
1 Parent(s): 89a4057

adds documentation to files

Browse files
validators/database.py CHANGED
@@ -6,11 +6,40 @@ from .streamer import ProcessedStreamResponse
6
 
7
 
8
  class LogDatabase:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
  def __init__(self, log_database_path: str):
 
 
 
 
 
 
10
  self.log_database_path = log_database_path
11
  self.ensure_db_exists(log_database_path)
12
 
13
  def ensure_db_exists(self, file_path):
 
 
 
 
 
 
14
  if not os.path.exists(file_path):
15
  # Create an empty JSONL file
16
  with open(file_path, "w") as file:
@@ -21,6 +50,15 @@ class LogDatabase:
21
  bt.logging.info(f"File '{file_path}' already exists.")
22
 
23
  async def add_streams_to_db(self, stream_responses: ProcessedStreamResponse):
 
 
 
 
 
 
 
 
 
24
  bt.logging.info(f"Writing streams to the database...")
25
  try:
26
  stream_responses_dict = [
@@ -35,6 +73,13 @@ class LogDatabase:
35
  raise e
36
 
37
  async def append_dicts_to_file(self, file_path, dictionaries):
 
 
 
 
 
 
 
38
  async with aiofiles.open(file_path, mode="a") as file:
39
  for dictionary in dictionaries:
40
  await file.write(json.dumps(dictionary) + "\n")
 
6
 
7
 
8
  class LogDatabase:
9
+ """
10
+ A class to manage a log database stored as a JSONL (JSON Lines) file.
11
+
12
+ Attributes:
13
+ log_database_path (str): The path to the log database file.
14
+
15
+ Methods:
16
+ ensure_db_exists(file_path):
17
+ Ensures that the log database file exists. If it doesn't, an empty file is created.
18
+
19
+ add_streams_to_db(stream_responses: ProcessedStreamResponse):
20
+ Asynchronously adds stream responses to the log database.
21
+
22
+ append_dicts_to_file(file_path, dictionaries):
23
+ Asynchronously appends a list of dictionaries to the specified file.
24
+ """
25
+
26
  def __init__(self, log_database_path: str):
27
+ """
28
+ Initializes the LogDatabase with the given log database file path.
29
+
30
+ Args:
31
+ log_database_path (str): The path to the log database file.
32
+ """
33
  self.log_database_path = log_database_path
34
  self.ensure_db_exists(log_database_path)
35
 
36
  def ensure_db_exists(self, file_path):
37
+ """
38
+ Ensures that the log database file exists. If it doesn't, creates an empty JSONL file.
39
+
40
+ Args:
41
+ file_path (str): The path to the log database file.
42
+ """
43
  if not os.path.exists(file_path):
44
  # Create an empty JSONL file
45
  with open(file_path, "w") as file:
 
50
  bt.logging.info(f"File '{file_path}' already exists.")
51
 
52
  async def add_streams_to_db(self, stream_responses: ProcessedStreamResponse):
53
+ """
54
+ Asynchronously adds stream responses to the log database.
55
+
56
+ Args:
57
+ stream_responses (ProcessedStreamResponse): A list of processed stream responses to add to the log database.
58
+
59
+ Raises:
60
+ Exception: If an error occurs while adding streams to the database.
61
+ """
62
  bt.logging.info(f"Writing streams to the database...")
63
  try:
64
  stream_responses_dict = [
 
73
  raise e
74
 
75
  async def append_dicts_to_file(self, file_path, dictionaries):
76
+ """
77
+ Asynchronously appends a list of dictionaries to the specified file.
78
+
79
+ Args:
80
+ file_path (str): The path to the file where dictionaries will be appended.
81
+ dictionaries (list): A list of dictionaries to append to the file.
82
+ """
83
  async with aiofiles.open(file_path, mode="a") as file:
84
  for dictionary in dictionaries:
85
  await file.write(json.dumps(dictionary) + "\n")
validators/stream_manager.py CHANGED
@@ -7,7 +7,24 @@ from aiohttp.web import Request
7
 
8
 
9
  class StreamManager:
 
 
 
 
 
 
 
 
 
 
 
10
  def __init__(self, log_database_path: str = "requests_db.jsonl"):
 
 
 
 
 
 
11
  self.log_database = LogDatabase(log_database_path)
12
 
13
  async def process_streams(
@@ -16,6 +33,17 @@ class StreamManager:
16
  streams_responses: List[AsyncIterator],
17
  stream_uids: List[int],
18
  ):
 
 
 
 
 
 
 
 
 
 
 
19
  lock = asyncio.Lock()
20
 
21
  streamers = [
 
7
 
8
 
9
  class StreamManager:
10
+ """
11
+ A class to manage the processing of multiple asynchronous data streams and log their responses.
12
+
13
+ Attributes:
14
+ log_database (LogDatabase): The log database to store stream responses.
15
+
16
+ Methods:
17
+ process_streams(request, streams_responses, stream_uids):
18
+ Processes multiple asynchronous streams, logs their responses, and returns the selected stream response.
19
+ """
20
+
21
  def __init__(self, log_database_path: str = "requests_db.jsonl"):
22
+ """
23
+ Initializes the StreamManager with the given log database file path.
24
+
25
+ Args:
26
+ log_database_path (str): The path to the log database file, defaults to "requests_db.jsonl".
27
+ """
28
  self.log_database = LogDatabase(log_database_path)
29
 
30
  async def process_streams(
 
33
  streams_responses: List[AsyncIterator],
34
  stream_uids: List[int],
35
  ):
36
+ """
37
+ Processes multiple asynchronous streams, logs their responses, and returns the selected stream response (stream from first non-empty chunk).
38
+
39
+ Args:
40
+ request (Request): The web request object.
41
+ streams_responses (List[AsyncIterator]): A list of asynchronous iterators representing the streams.
42
+ stream_uids (List[int]): A list of unique IDs for the streams.
43
+
44
+ Returns:
45
+ ProcessedStreamResponse: The response from the selected stream.
46
+ """
47
  lock = asyncio.Lock()
48
 
49
  streamers = [
validators/streamer.py CHANGED
@@ -11,6 +11,18 @@ from prompting.protocol import StreamPromptingSynapse
11
 
12
 
13
  class StreamChunk(BaseModel):
 
 
 
 
 
 
 
 
 
 
 
 
14
  delta: str
15
  finish_reason: Optional[str]
16
  accumulated_chunks: List[str]
@@ -20,11 +32,29 @@ class StreamChunk(BaseModel):
20
  selected_uid: int
21
 
22
  def encode(self, encoding: str) -> bytes:
 
 
 
 
 
 
 
 
 
23
  data = json.dumps(self.dict(), indent=4)
24
  return data.encode(encoding)
25
 
26
 
27
  class StreamError(BaseModel):
 
 
 
 
 
 
 
 
 
28
  error: str
29
  timestamp: str
30
  sequence_number: int
@@ -39,6 +69,20 @@ ProcessedStreamResponse = Union[StreamChunk, StreamError]
39
 
40
 
41
  class AsyncResponseDataStreamer:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
  def __init__(
43
  self,
44
  async_iterator: AsyncIterator,
@@ -59,6 +103,15 @@ class AsyncResponseDataStreamer:
59
  def ensure_response_is_created(
60
  self, initiated_response: web.StreamResponse
61
  ) -> web.StreamResponse:
 
 
 
 
 
 
 
 
 
62
  # Creates response if it was not created
63
  if initiated_response == None:
64
  initiated_response = web_response.StreamResponse(status=200, reason="OK")
@@ -74,6 +127,18 @@ class AsyncResponseDataStreamer:
74
  stream_chunk: StreamChunk,
75
  lock: asyncio.Lock,
76
  ) -> web.StreamResponse:
 
 
 
 
 
 
 
 
 
 
 
 
77
  # Try to acquire the lock and sets the lock_acquired flag. Only the stream that acquires the lock should write to the response
78
  if lock.locked() == False:
79
  self.lock_acquired = await lock.acquire()
@@ -93,6 +158,18 @@ class AsyncResponseDataStreamer:
93
  return initiated_response
94
 
95
  async def stream(self, request: web.Request) -> ProcessedStreamResponse:
 
 
 
 
 
 
 
 
 
 
 
 
96
  try:
97
  start_time = time.time()
98
  client_response: web.Response = None
 
11
 
12
 
13
  class StreamChunk(BaseModel):
14
+ """
15
+ A model representing a chunk of streaming data.
16
+
17
+ Attributes:
18
+ delta (str): The change in the stream.
19
+ finish_reason (Optional[str]): The reason for finishing the stream.
20
+ accumulated_chunks (List[str]): List of accumulated chunks.
21
+ accumulated_chunks_timings (List[float]): Timings for the accumulated chunks.
22
+ timestamp (str): The timestamp of the chunk.
23
+ sequence_number (int): The sequence number of the chunk.
24
+ selected_uid (int): The selected user ID.
25
+ """
26
  delta: str
27
  finish_reason: Optional[str]
28
  accumulated_chunks: List[str]
 
32
  selected_uid: int
33
 
34
  def encode(self, encoding: str) -> bytes:
35
+ """
36
+ Encodes the StreamChunk instance to a JSON-formatted bytes object.
37
+
38
+ Args:
39
+ encoding (str): The encoding to use.
40
+
41
+ Returns:
42
+ bytes: The encoded JSON data.
43
+ """
44
  data = json.dumps(self.dict(), indent=4)
45
  return data.encode(encoding)
46
 
47
 
48
  class StreamError(BaseModel):
49
+ """
50
+ A model representing an error in the streaming data.
51
+
52
+ Attributes:
53
+ error (str): The error message.
54
+ timestamp (str): The timestamp of the error.
55
+ sequence_number (int): The sequence number at the time of error.
56
+ finish_reason (str): The reason for finishing the stream, defaults to "error".
57
+ """
58
  error: str
59
  timestamp: str
60
  sequence_number: int
 
69
 
70
 
71
  class AsyncResponseDataStreamer:
72
+ """
73
+ A class to manage asynchronous streaming of response data.
74
+
75
+ Attributes:
76
+ async_iterator (AsyncIterator): An asynchronous iterator for streaming data.
77
+ selected_uid (int): The selected user ID.
78
+ lock (asyncio.Lock): An asyncio lock to ensure exclusive access.
79
+ delay (float): Delay between processing chunks, defaults to 0.1 seconds.
80
+ accumulated_chunks (List[str]): List of accumulated chunks.
81
+ accumulated_chunks_timings (List[float]): Timings for the accumulated chunks.
82
+ finish_reason (str): The reason for finishing the stream.
83
+ sequence_number (int): The sequence number of the stream.
84
+ lock_acquired (bool): Flag indicating if the lock was acquired.
85
+ """
86
  def __init__(
87
  self,
88
  async_iterator: AsyncIterator,
 
103
  def ensure_response_is_created(
104
  self, initiated_response: web.StreamResponse
105
  ) -> web.StreamResponse:
106
+ """
107
+ Ensures that a StreamResponse is created if it does not already exist.
108
+
109
+ Args:
110
+ initiated_response (web.StreamResponse): The initiated response.
111
+
112
+ Returns:
113
+ web.StreamResponse: The ensured response.
114
+ """
115
  # Creates response if it was not created
116
  if initiated_response == None:
117
  initiated_response = web_response.StreamResponse(status=200, reason="OK")
 
127
  stream_chunk: StreamChunk,
128
  lock: asyncio.Lock,
129
  ) -> web.StreamResponse:
130
+ """
131
+ Writes a stream chunk to the response if the lock is acquired.
132
+
133
+ Args:
134
+ request (web.Request): The web request object.
135
+ initiated_response (web.StreamResponse): The initiated response.
136
+ stream_chunk (StreamChunk): The chunk of stream data to write.
137
+ lock (asyncio.Lock): The lock to ensure exclusive access.
138
+
139
+ Returns:
140
+ web.StreamResponse: The response with the written chunk.
141
+ """
142
  # Try to acquire the lock and sets the lock_acquired flag. Only the stream that acquires the lock should write to the response
143
  if lock.locked() == False:
144
  self.lock_acquired = await lock.acquire()
 
158
  return initiated_response
159
 
160
  async def stream(self, request: web.Request) -> ProcessedStreamResponse:
161
+ """
162
+ Streams data from the async iterator and writes it to the response.
163
+
164
+ Args:
165
+ request (web.Request): The web request object.
166
+
167
+ Returns:
168
+ ProcessedStreamResponse: The final processed stream response.
169
+
170
+ Raises:
171
+ ValueError: If the stream does not return a valid synapse.
172
+ """
173
  try:
174
  start_time = time.time()
175
  client_response: web.Response = None