worker update
Browse files
__pycache__/inference_svm_model.cpython-310.pyc
ADDED
Binary file (960 Bytes). View file
|
|
__pycache__/mineru_single.cpython-310.pyc
ADDED
Binary file (4.24 kB). View file
|
|
__pycache__/worker.cpython-310.pyc
ADDED
Binary file (4.78 kB). View file
|
|
worker.py
CHANGED
@@ -10,71 +10,12 @@ from typing import Tuple, Dict, Any
|
|
10 |
|
11 |
from mineru_single import Processor
|
12 |
|
13 |
-
class MessageProcessor:
|
14 |
-
def __init__(self):
|
15 |
-
self.processor = Processor()
|
16 |
-
|
17 |
-
def process_message(self, body_bytes: bytes) -> Tuple[str, Dict[str, Any]]:
|
18 |
-
"""Process incoming message and return processed results"""
|
19 |
-
body_str = body_bytes.decode("utf-8")
|
20 |
-
data = json.loads(body_str)
|
21 |
-
|
22 |
-
headers = data.get("headers", {})
|
23 |
-
request_type = headers.get("request_type", "")
|
24 |
-
request_id = headers.get("request_id", "")
|
25 |
-
body = data.get("body", {})
|
26 |
-
|
27 |
-
if request_type != "process_files":
|
28 |
-
return "No processing done", data
|
29 |
-
|
30 |
-
input_files = body.get("input_files", [])
|
31 |
-
topics = body.get("topics", [])
|
32 |
-
|
33 |
-
urls, file_key_map = self._extract_urls_and_keys(input_files)
|
34 |
-
batch_results = self.processor.process_batch(urls)
|
35 |
-
md_context = self._create_markdown_context(batch_results, file_key_map)
|
36 |
-
|
37 |
-
final_json = self._create_response_json(request_id, input_files, topics, md_context)
|
38 |
-
return json.dumps(final_json, ensure_ascii=False), final_json
|
39 |
-
|
40 |
-
def _extract_urls_and_keys(self, input_files: list) -> Tuple[list, dict]:
|
41 |
-
"""Extract URLs and create file key mapping"""
|
42 |
-
urls = []
|
43 |
-
file_key_map = {}
|
44 |
-
for f in input_files:
|
45 |
-
key = f.get("key", "")
|
46 |
-
url = f.get("url", "")
|
47 |
-
urls.append(url)
|
48 |
-
file_key_map[url] = key
|
49 |
-
return urls, file_key_map
|
50 |
-
|
51 |
-
def _create_markdown_context(self, batch_results: dict, file_key_map: dict) -> list:
|
52 |
-
"""Create markdown context from batch results"""
|
53 |
-
md_context = []
|
54 |
-
for url, md_content in batch_results.items():
|
55 |
-
key = file_key_map.get(url, "")
|
56 |
-
md_context.append({"key": key, "body": md_content})
|
57 |
-
return md_context
|
58 |
-
|
59 |
-
def _create_response_json(self, request_id: str, input_files: list,
|
60 |
-
topics: list, md_context: list) -> dict:
|
61 |
-
"""Create the final response JSON"""
|
62 |
-
return {
|
63 |
-
"headers": {
|
64 |
-
"request_type": "question_extraction_update_from_gpu_server",
|
65 |
-
"request_id": request_id
|
66 |
-
},
|
67 |
-
"body": {
|
68 |
-
"input_files": input_files,
|
69 |
-
"topics": topics,
|
70 |
-
"md_context": md_context
|
71 |
-
}
|
72 |
-
}
|
73 |
|
74 |
class RabbitMQWorker:
|
75 |
def __init__(self, num_workers: int = 1):
|
76 |
self.num_workers = num_workers
|
77 |
-
self.
|
|
|
78 |
|
79 |
def callback(self, ch, method, properties, body):
|
80 |
"""Handle incoming RabbitMQ messages"""
|
@@ -83,13 +24,25 @@ class RabbitMQWorker:
|
|
83 |
|
84 |
print(f"[Worker {thread_id}] Received message: {body}, headers: {headers}")
|
85 |
|
86 |
-
if headers.get("
|
87 |
-
|
88 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
89 |
else:
|
90 |
-
print(f"[Worker {thread_id}] Unknown process
|
91 |
-
|
92 |
-
print("[Worker] Done")
|
93 |
|
94 |
def worker(self, channel):
|
95 |
"""Worker process to consume messages"""
|
|
|
10 |
|
11 |
from mineru_single import Processor
|
12 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
13 |
|
14 |
class RabbitMQWorker:
|
15 |
def __init__(self, num_workers: int = 1):
|
16 |
self.num_workers = num_workers
|
17 |
+
self.public_connection = pika.BlockingConnection(pika.URLParameters(os.getenv("RABBITMQ_URL", "amqp://guest:guest@localhost:5672/")))
|
18 |
+
self.public_channel = self.public_connection.channel()
|
19 |
|
20 |
def callback(self, ch, method, properties, body):
|
21 |
"""Handle incoming RabbitMQ messages"""
|
|
|
24 |
|
25 |
print(f"[Worker {thread_id}] Received message: {body}, headers: {headers}")
|
26 |
|
27 |
+
if headers.get("request_type") == "process_files":
|
28 |
+
contexts = []
|
29 |
+
body_dict = json.loads(body)
|
30 |
+
for file in body_dict.get("input_files", []):
|
31 |
+
contexts.append({"key": file["key"], "body": self.message_processor.process_message(file["url"])})
|
32 |
+
body_dict["md_context"] = contexts
|
33 |
+
json_body = json.dumps(body_dict)
|
34 |
+
self.public_channel.queue_declare(queue="ml_server", durable=True)
|
35 |
+
self.public_channel.basic_publish(
|
36 |
+
exchange="",
|
37 |
+
routing_key="ml_server",
|
38 |
+
body=json_body,
|
39 |
+
properties=pika.BasicProperties(headers=headers)
|
40 |
+
)
|
41 |
+
print(f"[Worker {thread_id}] Contexts: {contexts}")
|
42 |
+
|
43 |
else:
|
44 |
+
print(f"[Worker {thread_id}] Unknown process")
|
45 |
+
return
|
|
|
46 |
|
47 |
def worker(self, channel):
|
48 |
"""Worker process to consume messages"""
|