import os
import logging
import gradio as gr
from PIL import Image
from transformers import AutoModelForCausalLM, AutoTokenizer
from llama_cpp import Llama
from huggingface_hub import hf_hub_download

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# ------------------------------
# 🔹 Load Bioformer-8L Model
# ------------------------------
BIOFORMER_MODEL = "bioformers/bioformer-8L"
bioformer_tokenizer = AutoTokenizer.from_pretrained(BIOFORMER_MODEL)
bioformer_model = AutoModelForCausalLM.from_pretrained(BIOFORMER_MODEL)

# ------------------------------
# 🔹 Load DeepSeek-R1-Distill-Qwen-7B-GGUF Model
# ------------------------------
DEEPSEEK_REPO = "lmstudio-community/DeepSeek-R1-Distill-Qwen-7B-GGUF"
DEEPSEEK_FILENAME = "DeepSeek-R1-Distill-Qwen-7B-Q4_0.gguf"

model_path = hf_hub_download(repo_id=DEEPSEEK_REPO, filename=DEEPSEEK_FILENAME)

llm = Llama(
    model_path=model_path,
    n_ctx=4096,
    n_gpu_layers=0,  # CPU inference
    logits_all=True,
    n_batch=256
)

logger.info("Models Loaded Successfully.")

# ------------------------------
# 🔹 Unified Medical Prompt
# ------------------------------
UNIFIED_MEDICAL_PROMPT = """
You are an advanced Medical AI Assistant capable of providing thorough,
comprehensive answers for a wide range of medical specialties:
General Practice, Radiology, Cardiology, Neurology, Psychiatry, Pediatrics,
Endocrinology, Oncology, and more.

You can:
1) Analyze images if provided (Radiology).
2) Retrieve relevant documents from a knowledge base (Vector Store).
3) Provide scientific, evidence-based explanations and references when possible.

Always strive to provide a detailed, helpful, and empathetic response.
"""

# ------------------------------
# 🔹 Chat Function
# ------------------------------
def chat_with_agent(user_query, image_file=None):
    # Combine context
    combined_context = f"""
    {UNIFIED_MEDICAL_PROMPT}

    Patient Query: "{user_query}"
    Your Response:
    """

    # Generate response using DeepSeek-R1-Distill model
    response_accumulator = ""
    for token in llm(
        prompt=combined_context,
        max_tokens=1024,
        temperature=0.7,
        top_p=0.9,
        stream=True
    ):
        partial_text = token["choices"][0]["text"]
        response_accumulator += partial_text
        yield response_accumulator

# ------------------------------
# 🔹 Gradio Interface
# ------------------------------
with gr.Blocks(title="🏥 Llama3-Med AI Assistant") as demo:
    gr.Markdown("""
    # 🏥 Llama3-Med AI Assistant
    _Your intelligent medical assistant powered by advanced AI._
    """)

    with gr.Row():
        user_input = gr.Textbox(label="💬 Ask a medical question", placeholder="Type your question here...")
        image_file = gr.Image(label="📷 Upload Medical Image (Optional)", type="filepath")

    submit_btn = gr.Button("🚀 Submit", variant="primary")
    output_text = gr.Textbox(label="📝 Assistant's Response", interactive=False, lines=25)

    submit_btn.click(fn=chat_with_agent, inputs=[user_input, image_file], outputs=output_text)

if __name__ == "__main__":
    demo.queue().launch(server_name="0.0.0.0", server_port=7860)