from fastapi import FastAPI, UploadFile, File, HTTPException
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel
import sqlite3
import os
import pytesseract
from PIL import Image
from pdf2image import convert_from_path
from groq import Groq
import json
import logging

# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# --- Configuration ---
DATABASE = "medidoc.db"
UPLOAD_FOLDER = "uploads"
os.makedirs(UPLOAD_FOLDER, exist_ok=True)

# --- Groq Client Initialization ---
# Use environment variable for API key
GROQ_API_KEY = os.getenv("GROQ_API_KEY", "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")
client = Groq(api_key=GROQ_API_KEY)

# --- Database Setup ---
def init_db():
    try:
        conn = sqlite3.connect(DATABASE)
        cursor = conn.cursor()
        cursor.execute("""
        CREATE TABLE IF NOT EXISTS documents (
            id INTEGER PRIMARY KEY AUTOINCREMENT,
            filename TEXT NOT NULL,
            category TEXT,
            document_date TEXT,
            doctor_name TEXT,
            hospital_name TEXT,
            summary TEXT,
            content TEXT
        )
        """)
        conn.commit()
        conn.close()
        logger.info("Database initialized successfully")
    except Exception as e:
        logger.error(f"Database initialization failed: {e}")

init_db()

# --- FastAPI App ---
app = FastAPI(title="MediDoc API", version="1.0.0")

# Add CORS middleware
app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],  # In production, specify exact origins
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)

# --- Helper Functions ---
def extract_text_from_file(filepath: str) -> str:
    """Extract text from PDF or image files"""
    try:
        if not os.path.exists(filepath):
            logger.error(f"File not found: {filepath}")
            return ""
            
        if filepath.lower().endswith(".pdf"):
            pages = convert_from_path(filepath)
            text = ""
            for page in pages:
                text += pytesseract.image_to_string(page) + "\n"
            return text.strip()
        else:
            # Handle image files
            with Image.open(filepath) as img:
                text = pytesseract.image_to_string(img)
            return text.strip()
            
    except Exception as e:
        logger.error(f"Error extracting text from {filepath}: {e}")
        return ""

def process_with_llm(text: str) -> dict:
    """Analyze medical text using Groq's Llama model"""
    if not text.strip():
        return {
            "category": "Empty Document",
            "document_date": "N/A",
            "doctor_name": "N/A",
            "hospital_name": "N/A",
            "summary": "Document appears to be empty or text could not be extracted.",
        }
    
    system_prompt = """
    You are an expert medical data extraction assistant. Analyze the provided text from a medical document and extract key information.
    Respond ONLY with a valid JSON object containing exactly these keys:
    - "category": Choose from "Prescription", "Lab Report", "Medical Bill", "Pharmacy Bill", "Discharge Summary", "Consultation Notes", "Other"
    - "document_date": Date in YYYY-MM-DD format. If not found, use "N/A"
    - "doctor_name": Full name of the doctor. If not found, use "N/A"
    - "hospital_name": Name of hospital/clinic. If not found, use "N/A"
    - "summary": A brief, clear summary in 1-2 sentences describing what this document is about

    Return only the JSON object, no other text.
    """
    
    fallback_response = {
        "category": "Other",
        "document_date": "N/A",
        "doctor_name": "N/A",
        "hospital_name": "N/A",
        "summary": "Medical document processed but specific information could not be extracted.",
    }

    try:
        completion = client.chat.completions.create(
            model="llama-3.1-8b-instant",
            messages=[
                {"role": "system", "content": system_prompt},
                {"role": "user", "content": f"Medical document text:\n\n{text[:2000]}"}  # Limit text length
            ],
            temperature=0.1,
            max_tokens=300,
            top_p=1,
            stream=False,
        )
        
        response_content = completion.choices[0].message.content.strip()
        
        # Clean up the response
        if response_content.startswith("```json"):
            response_content = response_content[7:]
        if response_content.endswith("```"):
            response_content = response_content[:-3]
        response_content = response_content.strip()
        
        parsed_response = json.loads(response_content)
        
        # Validate required keys
        required_keys = ["category", "document_date", "doctor_name", "hospital_name", "summary"]
        for key in required_keys:
            if key not in parsed_response:
                parsed_response[key] = "N/A"
        
        return parsed_response

    except json.JSONDecodeError as e:
        logger.error(f"JSON Parsing Error: {e}\nRaw Response: {response_content}")
        return fallback_response
    except Exception as e:
        logger.error(f"Error with Groq API: {e}")
        return fallback_response

# --- API Endpoints ---
@app.get("/")
async def root():
    return {"message": "MediDoc API is running"}

@app.post("/upload/")
async def upload_document(file: UploadFile = File(...)):
    """Upload and process a medical document"""
    try:
        # Validate file type
        allowed_types = ['application/pdf', 'image/jpeg', 'image/jpg', 'image/png']
        if file.content_type not in allowed_types:
            raise HTTPException(status_code=400, detail="Only PDF and image files are allowed")
        
        # Save uploaded file
        filepath = os.path.join(UPLOAD_FOLDER, file.filename)
        with open(filepath, "wb") as buffer:
            content = await file.read()
            if not content:
                raise HTTPException(status_code=400, detail="Uploaded file is empty")
            buffer.write(content)
        
        logger.info(f"File saved: {filepath}")
        
        # Extract text
        text = extract_text_from_file(filepath)
        if not text.strip():
            # Clean up the file
            os.remove(filepath)
            raise HTTPException(status_code=400, detail="Could not extract text from the uploaded file")

        # Process with LLM
        processed_data = process_with_llm(text)
        
        # Save to database
        conn = sqlite3.connect(DATABASE)
        cursor = conn.cursor()
        cursor.execute(
            """INSERT INTO documents 
               (filename, category, document_date, doctor_name, hospital_name, summary, content) 
               VALUES (?, ?, ?, ?, ?, ?, ?)""",
            (
                file.filename,
                processed_data.get("category", "N/A"),
                processed_data.get("document_date", "N/A"),
                processed_data.get("doctor_name", "N/A"),
                processed_data.get("hospital_name", "N/A"),
                processed_data.get("summary", "N/A"),
                text
            ),
        )
        conn.commit()
        conn.close()
        
        logger.info(f"Document processed successfully: {file.filename}")
        return {"filename": file.filename, "info": processed_data, "status": "success"}
        
    except HTTPException:
        raise
    except Exception as e:
        logger.error(f"Unexpected error processing file: {e}")
        raise HTTPException(status_code=500, detail="Internal server error occurred while processing the file")

@app.get("/documents/")
def get_documents():
    """Retrieve all processed documents"""
    try:
        conn = sqlite3.connect(DATABASE)
        conn.row_factory = sqlite3.Row
        cursor = conn.cursor()
        cursor.execute("""
            SELECT id, filename, category, document_date, doctor_name, hospital_name, summary 
            FROM documents 
            ORDER BY 
                CASE WHEN document_date = 'N/A' THEN 1 ELSE 0 END,
                document_date DESC
        """)
        documents = [dict(row) for row in cursor.fetchall()]
        conn.close()
        return {"documents": documents, "count": len(documents)}
    except Exception as e:
        logger.error(f"Error retrieving documents: {e}")
        raise HTTPException(status_code=500, detail="Could not retrieve documents")

class SearchResult(BaseModel):
    answer: str
    sources: list

@app.get("/search/", response_model=SearchResult)
def search_medical_history(query: str):
    """Search through medical documents using natural language"""
    if not query.strip():
        raise HTTPException(status_code=400, detail="Search query cannot be empty")
    
    try:
        conn = sqlite3.connect(DATABASE)
        cursor = conn.cursor()
        cursor.execute("SELECT filename, content, summary, category FROM documents")
        all_docs = cursor.fetchall()
        conn.close()

        if not all_docs:
            return {"answer": "No documents have been uploaded yet. Please upload some medical documents first.", "sources": []}

        # Prepare context for the AI
        context_parts = []
        for i, doc in enumerate(all_docs):
            filename, content, summary, category = doc
            context_parts.append(f"Document {i+1}: {filename}\nCategory: {category}\nSummary: {summary}\nContent: {content[:1500]}")
        
        context = "\n\n---\n\n".join(context_parts)
        
        system_prompt = f"""
        You are a medical assistant helping a patient understand their medical history. 
        Answer the user's question based ONLY on the provided medical documents.
        
        Guidelines:
        - Provide a clear, helpful answer
        - Mention specific document names when referencing information
        - If information is not available in the documents, say so clearly
        - Be concise but informative
        - Use medical terminology appropriately but explain complex terms
        
        Available Documents:
        {context}
        """

        completion = client.chat.completions.create(
            model="llama-3.1-8b-instant",
            messages=[
                {"role": "system", "content": system_prompt},
                {"role": "user", "content": query}
            ],
            temperature=0.2,
            max_tokens=800,
        )
        
        answer = completion.choices[0].message.content
        
        # Find relevant sources mentioned in the answer
        sources = []
        for doc in all_docs:
            filename = doc[0]
            if filename.lower() in answer.lower():
                sources.append({
                    "filename": filename,
                    "summary": doc[2],
                    "category": doc[3]
                })
        
        return {"answer": answer, "sources": sources}
        
    except Exception as e:
        logger.error(f"Error during search: {e}")
        raise HTTPException(status_code=500, detail="Search service is currently unavailable")

@app.get("/health")
def health_check():
    """Health check endpoint"""
    return {"status": "healthy", "database": "connected"}

if __name__ == "__main__":
    import uvicorn
    uvicorn.run(app, host="0.0.0.0", port=8000)