SkyNait commited on
Commit
bab6bde
·
verified ·
1 Parent(s): 1116a38

Add logging

Browse files
Files changed (1) hide show
  1. worker.py +28 -12
worker.py CHANGED
@@ -10,11 +10,19 @@ from typing import Tuple, Dict, Any
10
 
11
  from mineru_single import Processor
12
 
 
 
 
 
 
 
 
13
 
14
  class RabbitMQWorker:
15
  def __init__(self, num_workers: int = 1):
16
  self.num_workers = num_workers
17
  self.rabbit_url = os.getenv("RABBITMQ_URL")
 
18
  self.processor = Processor()
19
 
20
  self.publisher_connection = None
@@ -23,6 +31,7 @@ class RabbitMQWorker:
23
 
24
  def setup_publisher(self):
25
  if not self.publisher_connection or self.publisher_connection.is_closed:
 
26
  connection_params = pika.URLParameters(self.rabbit_url)
27
  connection_params.heartbeat = 1000 # Match the consumer heartbeat
28
  connection_params.blocked_connection_timeout = 500 # Increase timeout
@@ -30,6 +39,7 @@ class RabbitMQWorker:
30
  self.publisher_connection = pika.BlockingConnection(connection_params)
31
  self.publisher_channel = self.publisher_connection.channel()
32
  self.publisher_channel.queue_declare(queue="ml_server", durable=True)
 
33
 
34
  def publish_message(self, body_dict: dict, headers: dict):
35
  """Use persistent connection for publishing"""
@@ -48,9 +58,10 @@ class RabbitMQWorker:
48
  headers=headers
49
  )
50
  )
 
51
  return True
52
  except Exception as e:
53
- print(f"Publish attempt {attempt + 1} failed: {e}")
54
  # Close failed connection
55
  if self.publisher_connection and not self.publisher_connection.is_closed:
56
  try:
@@ -61,7 +72,7 @@ class RabbitMQWorker:
61
  self.publisher_channel = None
62
 
63
  if attempt == max_retries - 1:
64
- print(f"Failed to publish after {max_retries} attempts")
65
  return False
66
  time.sleep(2)
67
 
@@ -70,7 +81,7 @@ class RabbitMQWorker:
70
  thread_id = threading.current_thread().name
71
  headers = properties.headers or {}
72
 
73
- print(f"[Worker {thread_id}] Received message: {body}, headers: {headers}")
74
 
75
  try:
76
  if headers.get("request_type") == "process_files":
@@ -81,6 +92,7 @@ class RabbitMQWorker:
81
  # Process files
82
  for file in body_dict.get("input_files", []):
83
  try:
 
84
  context = {"key": file["key"], "body": self.processor.process(file["url"], file["key"])}
85
  contexts.append(context)
86
  except Exception as e:
@@ -94,18 +106,18 @@ class RabbitMQWorker:
94
 
95
  # Publish results
96
  if self.publish_message(body_dict, headers):
97
- print(f"[Worker {thread_id}] Successfully published results")
98
  else:
99
  ch.basic_nack(delivery_tag=method.delivery_tag, requeue=True)
100
- print(f"[Worker {thread_id}] Failed to publish results")
101
 
102
- print(f"[Worker {thread_id}] Contexts: {contexts}")
103
  else:
104
  ch.basic_ack(delivery_tag=method.delivery_tag)
105
- print(f"[Worker {thread_id}] Unknown process")
106
 
107
  except Exception as e:
108
- print(f"Error in callback: {e}")
109
  ch.basic_nack(delivery_tag=method.delivery_tag, requeue=True)
110
 
111
  def connect_to_rabbitmq(self):
@@ -113,6 +125,8 @@ class RabbitMQWorker:
113
  connection_params = pika.URLParameters(self.rabbit_url)
114
  connection_params.heartbeat = 1000
115
  connection_params.blocked_connection_timeout = 500
 
 
116
 
117
  connection = pika.BlockingConnection(connection_params)
118
  channel = connection.channel()
@@ -123,21 +137,23 @@ class RabbitMQWorker:
123
  queue="gpu_server",
124
  on_message_callback=self.callback
125
  )
 
126
  return connection, channel
127
 
128
  def worker(self, channel):
129
  """Worker function"""
130
- print(f"Worker started")
131
  try:
132
  channel.start_consuming()
133
  except Exception as e:
134
- print(f"Worker stopped: {e}")
135
  finally:
 
136
  channel.close()
137
 
138
  def start(self):
139
  """Start the worker threads"""
140
- print(f"Starting {self.num_workers} workers")
141
  while True:
142
  try:
143
  with ThreadPoolExecutor(max_workers=self.num_workers) as executor:
@@ -145,7 +161,7 @@ class RabbitMQWorker:
145
  connection, channel = self.connect_to_rabbitmq()
146
  executor.submit(self.worker, channel)
147
  except Exception as e:
148
- print(f"Connection lost, reconnecting... Error: {e}")
149
  time.sleep(5) # Wait before reconnecting
150
 
151
  def main():
 
10
 
11
  from mineru_single import Processor
12
 
13
+ import logging
14
+
15
+ logging.basicConfig(
16
+ level=logging.INFO,
17
+ format="%(asctime)s [%(levelname)s] %(name)s - %(message)s"
18
+ )
19
+ logger = logging.getLogger(__name__)
20
 
21
  class RabbitMQWorker:
22
  def __init__(self, num_workers: int = 1):
23
  self.num_workers = num_workers
24
  self.rabbit_url = os.getenv("RABBITMQ_URL")
25
+ logger.info("Initializing RabbitMQWorker")
26
  self.processor = Processor()
27
 
28
  self.publisher_connection = None
 
31
 
32
  def setup_publisher(self):
33
  if not self.publisher_connection or self.publisher_connection.is_closed:
34
+ logger.info("Setting up publisher connection to RabbitMQ.")
35
  connection_params = pika.URLParameters(self.rabbit_url)
36
  connection_params.heartbeat = 1000 # Match the consumer heartbeat
37
  connection_params.blocked_connection_timeout = 500 # Increase timeout
 
39
  self.publisher_connection = pika.BlockingConnection(connection_params)
40
  self.publisher_channel = self.publisher_connection.channel()
41
  self.publisher_channel.queue_declare(queue="ml_server", durable=True)
42
+ logger.info("Publisher connection/channel established successfully.")
43
 
44
  def publish_message(self, body_dict: dict, headers: dict):
45
  """Use persistent connection for publishing"""
 
58
  headers=headers
59
  )
60
  )
61
+ logger.info("Published message to ml_server queue (attempt=%d).", attempt + 1)
62
  return True
63
  except Exception as e:
64
+ logger.error("Publish attempt %d failed: %s", attempt + 1, e)
65
  # Close failed connection
66
  if self.publisher_connection and not self.publisher_connection.is_closed:
67
  try:
 
72
  self.publisher_channel = None
73
 
74
  if attempt == max_retries - 1:
75
+ logger.error("Failed to publish after %d attempts", max_retries)
76
  return False
77
  time.sleep(2)
78
 
 
81
  thread_id = threading.current_thread().name
82
  headers = properties.headers or {}
83
 
84
+ logger.info("[Worker %s] Received message: %s, headers: %s", thread_id, body, headers)
85
 
86
  try:
87
  if headers.get("request_type") == "process_files":
 
92
  # Process files
93
  for file in body_dict.get("input_files", []):
94
  try:
95
+ logger.info("[Worker %s] Processing %d input files.", thread_id, len(file))
96
  context = {"key": file["key"], "body": self.processor.process(file["url"], file["key"])}
97
  contexts.append(context)
98
  except Exception as e:
 
106
 
107
  # Publish results
108
  if self.publish_message(body_dict, headers):
109
+ logger.info("[Worker %s] Successfully published results to ml_server.", thread_id)
110
  else:
111
  ch.basic_nack(delivery_tag=method.delivery_tag, requeue=True)
112
+ logger.error("[Worker %s] Failed to publish results.", thread_id)
113
 
114
+ logger.info("[Worker %s] Contexts: %s", thread_id, contexts)
115
  else:
116
  ch.basic_ack(delivery_tag=method.delivery_tag)
117
+ logger.warning("[Worker %s] Unknown process type in headers: %s", thread_id, headers.get("request_type"))
118
 
119
  except Exception as e:
120
+ logger.error("Error in callback: %s", e)
121
  ch.basic_nack(delivery_tag=method.delivery_tag, requeue=True)
122
 
123
  def connect_to_rabbitmq(self):
 
125
  connection_params = pika.URLParameters(self.rabbit_url)
126
  connection_params.heartbeat = 1000
127
  connection_params.blocked_connection_timeout = 500
128
+ logger.info("Connecting to RabbitMQ for consumer with heartbeat=1000.")
129
+
130
 
131
  connection = pika.BlockingConnection(connection_params)
132
  channel = connection.channel()
 
137
  queue="gpu_server",
138
  on_message_callback=self.callback
139
  )
140
+ logger.info("Consumer connected. Listening on queue='gpu_server'...")
141
  return connection, channel
142
 
143
  def worker(self, channel):
144
  """Worker function"""
145
+ logger.info("Worker thread started. Beginning consuming...")
146
  try:
147
  channel.start_consuming()
148
  except Exception as e:
149
+ logger.error("Worker thread encountered an error: %s", e)
150
  finally:
151
+ logger.info("Worker thread shutting down. Closing channel.")
152
  channel.close()
153
 
154
  def start(self):
155
  """Start the worker threads"""
156
+ logger.info("Starting %d workers in a ThreadPoolExecutor.", self.num_workers)
157
  while True:
158
  try:
159
  with ThreadPoolExecutor(max_workers=self.num_workers) as executor:
 
161
  connection, channel = self.connect_to_rabbitmq()
162
  executor.submit(self.worker, channel)
163
  except Exception as e:
164
+ logger.error("Connection lost, reconnecting... Error: %s", e)
165
  time.sleep(5) # Wait before reconnecting
166
 
167
  def main():