Spaces:
Runtime error
Runtime error
Create aimessage.py
Browse files- aimessage.py +149 -0
aimessage.py
ADDED
@@ -0,0 +1,149 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import time
|
3 |
+
import logging
|
4 |
+
import threading
|
5 |
+
from concurrent.futures import ThreadPoolExecutor, as_completed
|
6 |
+
from confluent_kafka import KafkaException, TopicPartition, Producer, Consumer
|
7 |
+
from confluent_kafka.schema_registry.avro import AvroDeserializer, AvroSerializer
|
8 |
+
from confluent_kafka.serialization import MessageField, SerializationContext
|
9 |
+
from aitask import handle_message, TooManyRequestsError
|
10 |
+
from schemaregistry import SchemaClient
|
11 |
+
|
12 |
+
logging.basicConfig(level=logging.INFO)
|
13 |
+
logger = logging.getLogger(__name__)
|
14 |
+
|
15 |
+
# Configuration
|
16 |
+
schema_registry_url = os.getenv("SCHEMA_REGISTRY_URL")
|
17 |
+
kafka_domain = os.getenv('DOMAIN')
|
18 |
+
password = os.getenv('PASSWORD')
|
19 |
+
|
20 |
+
conf = {
|
21 |
+
'bootstrap.servers': f"{kafka_domain}:29092",
|
22 |
+
'security.protocol': 'SASL_PLAINTEXT',
|
23 |
+
'sasl.mechanism': 'PLAIN',
|
24 |
+
'sasl.username': "dathuynh",
|
25 |
+
'sasl.password': password,
|
26 |
+
}
|
27 |
+
|
28 |
+
# Shutdown flag
|
29 |
+
shutdown_event = threading.Event()
|
30 |
+
|
31 |
+
def avro_deserializer():
|
32 |
+
schema_client = SchemaClient(schema_registry_url, "cybersentinal.avro.scan")
|
33 |
+
schema_str = schema_client.get_schema_str()
|
34 |
+
|
35 |
+
if schema_str is None:
|
36 |
+
raise RuntimeError("Failed to fetch schema for MessageResponse")
|
37 |
+
schema_registry_client = schema_client.schema_registry_client
|
38 |
+
return AvroDeserializer(schema_registry_client, schema_str)
|
39 |
+
|
40 |
+
def avro_serializer():
|
41 |
+
schema_client = SchemaClient(schema_registry_url, "cybersentinal.avro.scandetail")
|
42 |
+
schema_str = schema_client.get_schema_str()
|
43 |
+
|
44 |
+
if schema_str is None:
|
45 |
+
raise RuntimeError("Failed to fetch schema for MessageResponse")
|
46 |
+
schema_registry_client = schema_client.schema_registry_client
|
47 |
+
return AvroSerializer(schema_registry_client, schema_str)
|
48 |
+
|
49 |
+
def create_consumer(group_id):
|
50 |
+
consumer_conf = {
|
51 |
+
**conf,
|
52 |
+
'group.id': group_id,
|
53 |
+
'auto.offset.reset': 'latest',
|
54 |
+
'session.timeout.ms': 60000,
|
55 |
+
'heartbeat.interval.ms': 3000,
|
56 |
+
'enable.auto.commit': False, # Manual commit
|
57 |
+
'log_level': 4
|
58 |
+
}
|
59 |
+
return Consumer(**consumer_conf)
|
60 |
+
|
61 |
+
|
62 |
+
def create_producer():
|
63 |
+
producer_conf = {
|
64 |
+
**conf,
|
65 |
+
'linger.ms': 10,
|
66 |
+
'batch.num.messages': 1000,
|
67 |
+
'queue.buffering.max.ms': 1000
|
68 |
+
}
|
69 |
+
|
70 |
+
return Producer(**producer_conf)
|
71 |
+
|
72 |
+
# Create producer instance
|
73 |
+
producer = create_producer()
|
74 |
+
|
75 |
+
def ensure_producer_connected(producer):
|
76 |
+
retries = 5
|
77 |
+
for attempt in range(retries):
|
78 |
+
try:
|
79 |
+
producer.list_topics(timeout=5)
|
80 |
+
break
|
81 |
+
except KafkaException as e:
|
82 |
+
if attempt < retries - 1:
|
83 |
+
time.sleep(5)
|
84 |
+
else:
|
85 |
+
logger.error("Max retries reached. Could not establish a producer connection.")
|
86 |
+
raise e
|
87 |
+
|
88 |
+
def decode_message(msg, avro_deserializer, topic):
|
89 |
+
try:
|
90 |
+
byte_message = msg.value()
|
91 |
+
return avro_deserializer(byte_message, SerializationContext(topic, MessageField.VALUE))
|
92 |
+
except Exception as e:
|
93 |
+
logger.error(f"Error decoding message: {e}")
|
94 |
+
return None
|
95 |
+
|
96 |
+
def kafka_consumer(group_id, topic):
|
97 |
+
consumer = create_consumer(group_id)
|
98 |
+
consumer.subscribe([topic])
|
99 |
+
deserializer = avro_deserializer()
|
100 |
+
serializer = avro_serializer()
|
101 |
+
logger.info(f"Consumer {group_id} is running. Waiting for messages on topic {topic}...")
|
102 |
+
|
103 |
+
with ThreadPoolExecutor(max_workers=10) as executor:
|
104 |
+
shutdown_timer = threading.Timer(14400, shutdown_event.set) # Set to shutdown after 4 hours
|
105 |
+
shutdown_timer.start()
|
106 |
+
while not shutdown_event.is_set():
|
107 |
+
try:
|
108 |
+
msgs = consumer.consume(num_messages=10, timeout=1.0)
|
109 |
+
if not msgs:
|
110 |
+
continue
|
111 |
+
futures = [
|
112 |
+
executor.submit(
|
113 |
+
handle_message,
|
114 |
+
decode_message(msg, deserializer, topic),
|
115 |
+
producer,
|
116 |
+
ensure_producer_connected,
|
117 |
+
serializer
|
118 |
+
) for msg in msgs if decode_message(msg, deserializer, topic) is not None
|
119 |
+
]
|
120 |
+
for future in as_completed(futures):
|
121 |
+
try:
|
122 |
+
future.result()
|
123 |
+
except Exception as e:
|
124 |
+
if isinstance(e, TooManyRequestsError):
|
125 |
+
partition = msg.partition()
|
126 |
+
consumer.pause([TopicPartition(topic, partition)])
|
127 |
+
logger.info(f"Paused partition {partition} due to TooManyRequestsError")
|
128 |
+
handle_retry(consumer, topic, partition, e.retry_after)
|
129 |
+
else:
|
130 |
+
logger.error(f"Error processing message: {e}")
|
131 |
+
raise e
|
132 |
+
except KafkaException as e:
|
133 |
+
logger.error(f"Kafka exception: {e}. Restarting consumer loop...")
|
134 |
+
time.sleep(5)
|
135 |
+
except KeyboardInterrupt:
|
136 |
+
logger.info("Consumer interrupted. Exiting...")
|
137 |
+
shutdown_event.set()
|
138 |
+
shutdown_timer.cancel()
|
139 |
+
consumer.close()
|
140 |
+
|
141 |
+
def handle_retry(consumer, topic, partition, retry_after):
|
142 |
+
time.sleep(retry_after)
|
143 |
+
consumer.resume([TopicPartition(topic, partition)])
|
144 |
+
|
145 |
+
def start_kafka_consumer_thread(group_id, topic):
|
146 |
+
consumer_thread = threading.Thread(target=kafka_consumer, args=(group_id, topic))
|
147 |
+
consumer_thread.daemon = True
|
148 |
+
consumer_thread.start()
|
149 |
+
return consumer_thread
|