SkyNait commited on
Commit
8cf3fe8
·
1 Parent(s): f81cfef

rabbitmq test

Browse files
__pycache__/inference_svm_model.cpython-310.pyc CHANGED
Binary files a/__pycache__/inference_svm_model.cpython-310.pyc and b/__pycache__/inference_svm_model.cpython-310.pyc differ
 
__pycache__/mineru_single.cpython-310.pyc CHANGED
Binary files a/__pycache__/mineru_single.cpython-310.pyc and b/__pycache__/mineru_single.cpython-310.pyc differ
 
__pycache__/table_row_extraction.cpython-310.pyc CHANGED
Binary files a/__pycache__/table_row_extraction.cpython-310.pyc and b/__pycache__/table_row_extraction.cpython-310.pyc differ
 
__pycache__/topic_extraction.cpython-310.pyc CHANGED
Binary files a/__pycache__/topic_extraction.cpython-310.pyc and b/__pycache__/topic_extraction.cpython-310.pyc differ
 
__pycache__/worker.cpython-310.pyc CHANGED
Binary files a/__pycache__/worker.cpython-310.pyc and b/__pycache__/worker.cpython-310.pyc differ
 
test_listener.py CHANGED
@@ -1,33 +1,82 @@
1
  import pika
2
  import os
 
3
  import dotenv
 
4
  import time
5
- dotenv.load_dotenv()
6
-
7
- params = pika.URLParameters(os.getenv("RABBITMQ_URL"))
8
- params.heartbeat = 5
9
- params.blocked_connection_timeout = 2
10
- params.connection_attempts = 3
11
- params.retry_delay = 5
12
 
13
- connection = pika.BlockingConnection(params)
14
-
15
- channel = connection.channel()
16
 
17
- channel.queue_declare(queue="web_server", durable=True)
18
 
19
- def callback(ch, method, properties, body):
20
- try:
21
- print(f"Received message: {body}")
22
- print(f"Properties: {properties}")
23
- print(f"Method: {method}")
24
- print(f"Channel: {ch}")
25
- time.sleep(10)
26
- except Exception as e:
27
- print(f"Error: {e}")
28
-
 
 
 
 
 
 
 
 
29
 
30
- channel.basic_consume(queue="web_server", on_message_callback=callback, auto_ack=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
 
32
- print("Waiting for messages...")
33
- channel.start_consuming()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import pika
2
  import os
3
+ import json
4
  import dotenv
5
+ import threading
6
  import time
 
 
 
 
 
 
 
7
 
8
+ dotenv.load_dotenv()
 
 
9
 
10
+ RABBITMQ_URL = os.getenv("RABBITMQ_URL")
11
 
12
+ def send_topic_extraction_request(payload: dict):
13
+ """Simulate ml_server sending a topic extraction request to gpu_server."""
14
+ params = pika.URLParameters(RABBITMQ_URL)
15
+ params.heartbeat = 5
16
+ params.blocked_connection_timeout = 2
17
+ connection = pika.BlockingConnection(params)
18
+ channel = connection.channel()
19
+ channel.queue_declare(queue="gpu_server", durable=True)
20
+
21
+ message = json.dumps(payload).encode("utf-8")
22
+ channel.basic_publish(
23
+ exchange="",
24
+ routing_key="gpu_server",
25
+ body=message,
26
+ properties=pika.BasicProperties(delivery_mode=2)
27
+ )
28
+ print("Topic extraction request sent from ml_server to gpu_server.")
29
+ connection.close()
30
 
31
+ def listen_for_results():
32
+ """Simulate ml_server listening for topic extraction results on ml_server queue."""
33
+ params = pika.URLParameters(RABBITMQ_URL)
34
+ params.heartbeat = 5
35
+ params.blocked_connection_timeout = 2
36
+ connection = pika.BlockingConnection(params)
37
+ channel = connection.channel()
38
+ channel.queue_declare(queue="ml_server", durable=True)
39
+
40
+ def callback(ch, method, properties, body):
41
+ try:
42
+ result = json.loads(body)
43
+ print("Received topic extraction result:")
44
+ print(json.dumps(result, indent=2))
45
+ ch.basic_ack(delivery_tag=method.delivery_tag)
46
+ except Exception as e:
47
+ print("Error processing message:", e)
48
+ ch.basic_nack(delivery_tag=method.delivery_tag, requeue=True)
49
+
50
+ channel.basic_consume(queue="ml_server", on_message_callback=callback)
51
+ print("Listening for topic extraction results on ml_server queue...")
52
+ channel.start_consuming()
53
 
54
+ if __name__ == "__main__":
55
+ payload = {
56
+ "pattern": "topic_extraction",
57
+ "data": {
58
+ "input_files": [
59
+ {
60
+ "key": "file1",
61
+ "url": "https://www.ocr.org.uk/Images/168982-specification-gcse-mathematics.pdf",
62
+ "type": "specification"
63
+ }
64
+ ],
65
+ "topics": [
66
+ {
67
+ "title": "Sample Topic",
68
+ "id": 1
69
+ }
70
+ ]
71
+ }
72
+ }
73
+
74
+ # Start the producer (ml_server sending the request) in a separate thread.
75
+ producer_thread = threading.Thread(target=send_topic_extraction_request, args=(payload,))
76
+ producer_thread.start()
77
+
78
+ # Give a short delay to allow the message to reach the worker.
79
+ time.sleep(1)
80
+
81
+ # Start listening for results on the ml_server queue.
82
+ listen_for_results()
topic_extraction.log CHANGED
The diff for this file is too large to render. See raw diff
 
topic_extraction.py CHANGED
@@ -979,9 +979,9 @@ class MineruNoTextProcessor:
979
  if __name__ == "__main__":
980
  input_pdf = "/home/user/app/input_output/wjec-gce-as-a-economics-specification-from-2015.pdf"
981
  output_dir = "/home/user/app/pearson_json"
982
- gemini_key = os.getenv("GEMINI_API_KEY", "AIzaSyDtoakpXa2pjJwcQB6TJ5QaXHNSA5JxcrU")
983
  try:
984
- processor = MineruNoTextProcessor(output_folder=output_dir, gemini_api_key=gemini_key)
985
  result = processor.process(input_pdf)
986
  logger.info("Processing completed successfully.")
987
  except Exception as e:
 
979
  if __name__ == "__main__":
980
  input_pdf = "/home/user/app/input_output/wjec-gce-as-a-economics-specification-from-2015.pdf"
981
  output_dir = "/home/user/app/pearson_json"
982
+ api_key = os.getenv("GEMINI_API_KEY", "AIzaSyDtoakpXa2pjJwcQB6TJ5QaXHNSA5JxcrU")
983
  try:
984
+ processor = MineruNoTextProcessor(output_folder=output_dir, gemini_api_key=api_key)
985
  result = processor.process(input_pdf)
986
  logger.info("Processing completed successfully.")
987
  except Exception as e:
worker.py CHANGED
@@ -29,7 +29,7 @@ class RabbitMQWorker:
29
 
30
  self.topic_processor = MineruNoTextProcessor(
31
  output_folder="/tmp/topic_extraction_outputs",
32
- gemini_api_key=os.getenv("GEMINI_API_KEY")
33
  )
34
 
35
  self.publisher_connection = None
@@ -140,7 +140,8 @@ class RabbitMQWorker:
140
  pdf_url = file.get("url")
141
  logger.info("[Worker %s] Processing topic extraction for URL: %s", thread_id, pdf_url)
142
 
143
- result = self.topic_processor.process(pdf_url)
 
144
  context = {
145
  "key": file.get("key", ""),
146
  "body": result
@@ -215,4 +216,9 @@ class RabbitMQWorker:
215
 
216
  def main():
217
  worker = RabbitMQWorker()
218
- worker.start()
 
 
 
 
 
 
29
 
30
  self.topic_processor = MineruNoTextProcessor(
31
  output_folder="/tmp/topic_extraction_outputs",
32
+ api_key = os.getenv("GEMINI_API_KEY", "AIzaSyDtoakpXa2pjJwcQB6TJ5QaXHNSA5JxcrU")
33
  )
34
 
35
  self.publisher_connection = None
 
140
  pdf_url = file.get("url")
141
  logger.info("[Worker %s] Processing topic extraction for URL: %s", thread_id, pdf_url)
142
 
143
+ # result = self.topic_processor.process(pdf_url)
144
+ result = self.topic_processor.process(pdf_url, inputs={"api_key": os.getenv("GEMINI_API_KEY")})
145
  context = {
146
  "key": file.get("key", ""),
147
  "body": result
 
216
 
217
  def main():
218
  worker = RabbitMQWorker()
219
+ worker.start()
220
+
221
+ if __name__ == "__main__":
222
+ main()
223
+
224
+ __all__ = ['main']