MinerU

Paused

App Files Files Community

SkyNait commited on Mar 5

Commit

8cf3fe8

1 Parent(s): f81cfef

rabbitmq test

Browse files

Files changed (9) hide show

__pycache__/inference_svm_model.cpython-310.pyc +0 -0
__pycache__/mineru_single.cpython-310.pyc +0 -0
__pycache__/table_row_extraction.cpython-310.pyc +0 -0
__pycache__/topic_extraction.cpython-310.pyc +0 -0
__pycache__/worker.cpython-310.pyc +0 -0
test_listener.py +73 -24
topic_extraction.log +0 -0
topic_extraction.py +2 -2
worker.py +9 -3

__pycache__/inference_svm_model.cpython-310.pyc CHANGED Viewed

Binary files a/__pycache__/inference_svm_model.cpython-310.pyc and b/__pycache__/inference_svm_model.cpython-310.pyc differ

__pycache__/mineru_single.cpython-310.pyc CHANGED Viewed

Binary files a/__pycache__/mineru_single.cpython-310.pyc and b/__pycache__/mineru_single.cpython-310.pyc differ

__pycache__/table_row_extraction.cpython-310.pyc CHANGED Viewed

Binary files a/__pycache__/table_row_extraction.cpython-310.pyc and b/__pycache__/table_row_extraction.cpython-310.pyc differ

__pycache__/topic_extraction.cpython-310.pyc CHANGED Viewed

Binary files a/__pycache__/topic_extraction.cpython-310.pyc and b/__pycache__/topic_extraction.cpython-310.pyc differ

__pycache__/worker.cpython-310.pyc CHANGED Viewed

Binary files a/__pycache__/worker.cpython-310.pyc and b/__pycache__/worker.cpython-310.pyc differ

test_listener.py CHANGED Viewed

@@ -1,33 +1,82 @@
 import pika
 import os
 import dotenv
 import time
-dotenv.load_dotenv()
-params = pika.URLParameters(os.getenv("RABBITMQ_URL"))
-params.heartbeat = 5
-params.blocked_connection_timeout = 2
-params.connection_attempts = 3
-params.retry_delay = 5
-connection = pika.BlockingConnection(params)
-channel = connection.channel()
-channel.queue_declare(queue="web_server", durable=True)
-def callback(ch, method, properties, body):
-    try:
-        print(f"Received message: {body}")
-        print(f"Properties: {properties}")
-        print(f"Method: {method}")
-        print(f"Channel: {ch}")
-        time.sleep(10)
-    except Exception as e:
-        print(f"Error: {e}")
-channel.basic_consume(queue="web_server", on_message_callback=callback, auto_ack=True)
-print("Waiting for messages...")
-channel.start_consuming()

 import pika
 import os
+import json
 import dotenv
+import threading
 import time
+dotenv.load_dotenv()
+RABBITMQ_URL = os.getenv("RABBITMQ_URL")
+def send_topic_extraction_request(payload: dict):
+    """Simulate ml_server sending a topic extraction request to gpu_server."""
+    params = pika.URLParameters(RABBITMQ_URL)
+    params.heartbeat = 5
+    params.blocked_connection_timeout = 2
+    connection = pika.BlockingConnection(params)
+    channel = connection.channel()
+    channel.queue_declare(queue="gpu_server", durable=True)
+    message = json.dumps(payload).encode("utf-8")
+    channel.basic_publish(
+        exchange="",
+        routing_key="gpu_server",
+        body=message,
+        properties=pika.BasicProperties(delivery_mode=2)
+    )
+    print("Topic extraction request sent from ml_server to gpu_server.")
+    connection.close()
+def listen_for_results():
+    """Simulate ml_server listening for topic extraction results on ml_server queue."""
+    params = pika.URLParameters(RABBITMQ_URL)
+    params.heartbeat = 5
+    params.blocked_connection_timeout = 2
+    connection = pika.BlockingConnection(params)
+    channel = connection.channel()
+    channel.queue_declare(queue="ml_server", durable=True)
+    def callback(ch, method, properties, body):
+        try:
+            result = json.loads(body)
+            print("Received topic extraction result:")
+            print(json.dumps(result, indent=2))
+            ch.basic_ack(delivery_tag=method.delivery_tag)
+        except Exception as e:
+            print("Error processing message:", e)
+            ch.basic_nack(delivery_tag=method.delivery_tag, requeue=True)
+    channel.basic_consume(queue="ml_server", on_message_callback=callback)
+    print("Listening for topic extraction results on ml_server queue...")
+    channel.start_consuming()
+if __name__ == "__main__":
+    payload = {
+      "pattern": "topic_extraction",
+      "data": {
+          "input_files": [
+              {
+                  "key": "file1",
+                  "url": "https://www.ocr.org.uk/Images/168982-specification-gcse-mathematics.pdf",
+                  "type": "specification"
+              }
+          ],
+          "topics": [
+              {
+                  "title": "Sample Topic",
+                  "id": 1
+              }
+          ]
+      }
+    }
+    # Start the producer (ml_server sending the request) in a separate thread.
+    producer_thread = threading.Thread(target=send_topic_extraction_request, args=(payload,))
+    producer_thread.start()
+    # Give a short delay to allow the message to reach the worker.
+    time.sleep(1)
+    # Start listening for results on the ml_server queue.
+    listen_for_results()

topic_extraction.log CHANGED Viewed

The diff for this file is too large to render. See raw diff

topic_extraction.py CHANGED Viewed

@@ -979,9 +979,9 @@ class MineruNoTextProcessor:
 if __name__ == "__main__":
     input_pdf = "/home/user/app/input_output/wjec-gce-as-a-economics-specification-from-2015.pdf"
     output_dir = "/home/user/app/pearson_json"
-    gemini_key = os.getenv("GEMINI_API_KEY", "AIzaSyDtoakpXa2pjJwcQB6TJ5QaXHNSA5JxcrU")
     try:
-        processor = MineruNoTextProcessor(output_folder=output_dir, gemini_api_key=gemini_key)
         result = processor.process(input_pdf)
         logger.info("Processing completed successfully.")
     except Exception as e:

 if __name__ == "__main__":
     input_pdf = "/home/user/app/input_output/wjec-gce-as-a-economics-specification-from-2015.pdf"
     output_dir = "/home/user/app/pearson_json"
+    api_key = os.getenv("GEMINI_API_KEY", "AIzaSyDtoakpXa2pjJwcQB6TJ5QaXHNSA5JxcrU")
     try:
+        processor = MineruNoTextProcessor(output_folder=output_dir, gemini_api_key=api_key)
         result = processor.process(input_pdf)
         logger.info("Processing completed successfully.")
     except Exception as e:

worker.py CHANGED Viewed

@@ -29,7 +29,7 @@ class RabbitMQWorker:
         self.topic_processor = MineruNoTextProcessor(
             output_folder="/tmp/topic_extraction_outputs",
-            gemini_api_key=os.getenv("GEMINI_API_KEY")
         )
         self.publisher_connection = None
@@ -140,7 +140,8 @@ class RabbitMQWorker:
                         pdf_url = file.get("url")
                         logger.info("[Worker %s] Processing topic extraction for URL: %s", thread_id, pdf_url)
-                        result = self.topic_processor.process(pdf_url)
                         context = {
                             "key": file.get("key", ""),
                             "body": result
@@ -215,4 +216,9 @@ class RabbitMQWorker:
 def main():
     worker = RabbitMQWorker()
-    worker.start()

         self.topic_processor = MineruNoTextProcessor(
             output_folder="/tmp/topic_extraction_outputs",
+            api_key = os.getenv("GEMINI_API_KEY", "AIzaSyDtoakpXa2pjJwcQB6TJ5QaXHNSA5JxcrU")
         )
         self.publisher_connection = None
                         pdf_url = file.get("url")
                         logger.info("[Worker %s] Processing topic extraction for URL: %s", thread_id, pdf_url)
+                        # result = self.topic_processor.process(pdf_url)
+                        result = self.topic_processor.process(pdf_url, inputs={"api_key": os.getenv("GEMINI_API_KEY")})
                         context = {
                             "key": file.get("key", ""),
                             "body": result
 def main():
     worker = RabbitMQWorker()
+    worker.start()
+if __name__ == "__main__":
+    main()
+__all__ = ['main']