import logging import gradio as gr from queue import Queue import time from prometheus_client import start_http_server, Counter, Histogram import threading import psutil import random from transformers import pipeline # Load the model ner_pipeline = pipeline("ner", model="Sevixdd/roberta-base-finetuned-ner") # --- Prometheus Metrics Setup --- REQUEST_COUNT = Counter('gradio_request_count', 'Total number of requests') REQUEST_LATENCY = Histogram('gradio_request_latency_seconds', 'Request latency in seconds') # --- Logging Setup --- logging.basicConfig(filename="chat_log.txt", level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') # --- Queue and Metrics --- chat_queue = Queue() # --- Chat Function with Monitoring --- def chat_function(message, history): with REQUEST_LATENCY.time(): REQUEST_COUNT.inc() try: start_time = time.time() chat_queue.put(message) logging.info(f"User: {message}") # Simulate some processing using the NER pipeline ner_result = ner_pipeline(message) response = f"Response from NER model: {ner_result}" logging.info(f"Bot: {response}") time.sleep(random.uniform(0.5, 2.5)) # Simulate processing time return response except Exception as e: logging.error(f"Error in chat processing: {e}") return "An error occurred. Please try again." # --- Gradio Interface with Three Windows --- with gr.Blocks(title="PLOD Filtered with Monitoring") as demo: with gr.Tab("Chat"): gr.Markdown("## Chat with the Bot") chatbot = gr.ChatInterface(fn=chat_function) with gr.Tab("Performance Metrics"): with gr.Row(): request_count_display = gr.Textbox(label="Request Count") avg_latency_display = gr.Textbox(label="Avg. Response Time (s)") def update_metrics(): while True: request_count_display.update(REQUEST_COUNT.collect()[0].samples[0].value) avg_latency_display.update(round(REQUEST_LATENCY.collect()[0].samples[0].value, 2)) time.sleep(5) # Update every 5 seconds with gr.Tab("Resource Usage"): with gr.Row(): cpu_usage_display = gr.Textbox(label="CPU Usage (%)") mem_usage_display = gr.Textbox(label="Memory Usage (%)") def update_usage(): while True: cpu_usage_display.update(psutil.cpu_percent()) mem_usage_display.update(psutil.virtual_memory().percent) time.sleep(5) # Update every 5 seconds # --- Start Threads --- threading.Thread(target=start_http_server, args=(8000,), daemon=True).start() threading.Thread(target=update_metrics, daemon=True).start() threading.Thread(target=update_usage, daemon=True).start() # --- Simulate Chat Interactions --- def simulate_interactions(): messages = ["Hello bot!", "What's your name?", "Tell me a joke.", "Who are you?"] for msg in messages: chat_function(msg, []) time.sleep(random.uniform(1, 5)) # Random interval between messages threading.Thread(target=simulate_interactions, daemon=True).start() # Start simulation # Launch the app demo.launch(share=True)