import logging
import gradio as gr
from queue import Queue
import time
from prometheus_client import start_http_server, Counter, Histogram
import threading
import psutil
import random
from transformers import pipeline

# Load the model
ner_pipeline = pipeline("ner", model="Sevixdd/roberta-base-finetuned-ner")

# --- Prometheus Metrics Setup ---
REQUEST_COUNT = Counter('gradio_request_count', 'Total number of requests')
REQUEST_LATENCY = Histogram('gradio_request_latency_seconds', 'Request latency in seconds')

# --- Logging Setup ---
logging.basicConfig(filename="chat_log.txt", level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

# --- Queue and Metrics ---
chat_queue = Queue()

# --- Chat Function with Monitoring ---
def chat_function(message, history):
    with REQUEST_LATENCY.time():
        REQUEST_COUNT.inc()

        try:
            start_time = time.time()
            chat_queue.put(message)
            logging.info(f"User: {message}")

            # Simulate some processing using the NER pipeline
            ner_result = ner_pipeline(message)

            response = f"Response from NER model: {ner_result}" 
            logging.info(f"Bot: {response}")
            time.sleep(random.uniform(0.5, 2.5))  # Simulate processing time

            return response
        except Exception as e:
            logging.error(f"Error in chat processing: {e}")
            return "An error occurred. Please try again."

# --- Gradio Interface with Three Windows ---
with gr.Blocks(title="PLOD Filtered with Monitoring") as demo:
    with gr.Tab("Chat"):
        gr.Markdown("## Chat with the Bot")
        chatbot = gr.ChatInterface(fn=chat_function)

    with gr.Tab("Performance Metrics"):
        with gr.Row():
            request_count_display = gr.Textbox(label="Request Count")
            avg_latency_display = gr.Textbox(label="Avg. Response Time (s)")

        def update_metrics():
            while True:
                request_count_display.update(REQUEST_COUNT.collect()[0].samples[0].value)
                avg_latency_display.update(round(REQUEST_LATENCY.collect()[0].samples[0].value, 2))
                time.sleep(5)  # Update every 5 seconds

    with gr.Tab("Resource Usage"):
        with gr.Row():
            cpu_usage_display = gr.Textbox(label="CPU Usage (%)")
            mem_usage_display = gr.Textbox(label="Memory Usage (%)")

        def update_usage():
            while True:
                cpu_usage_display.update(psutil.cpu_percent())
                mem_usage_display.update(psutil.virtual_memory().percent)
                time.sleep(5)  # Update every 5 seconds

    # --- Start Threads ---
    threading.Thread(target=start_http_server, args=(8000,), daemon=True).start()
    threading.Thread(target=update_metrics, daemon=True).start()
    threading.Thread(target=update_usage, daemon=True).start()

    # --- Simulate Chat Interactions ---
    def simulate_interactions():
        messages = ["Hello bot!", "What's your name?", "Tell me a joke.", "Who are you?"]
        for msg in messages:
            chat_function(msg, [])
            time.sleep(random.uniform(1, 5))  # Random interval between messages

    threading.Thread(target=simulate_interactions, daemon=True).start()  # Start simulation

# Launch the app
demo.launch(share=True)