import streamlit as st
import os
import tempfile
from pathlib import Path
import time
from typing import List, Dict, Tuple
import pandas as pd
from streamlit.runtime.uploaded_file_manager import UploadedFile
from anthropic import Anthropic
import pymongo
from dotenv import load_dotenv
import fitz  # PyMuPDF
import voyageai
from pinecone.grpc import PineconeGRPC as Pinecone
from pinecone import ServerlessSpec
from pinecone import Index

# Load environment variables
load_dotenv()

# Initialize VoyageAI constants
VOYAGEAI_BATCH_SIZE = 128
VOYAGEAI_VECTOR_DIM = 512

# Initialize Pinecone
PINECONE_ID = "intratalent-v2"

# Initialize MongoDB client
MONGO_URI = os.getenv('MONGO_URI')
mongo_client = pymongo.MongoClient(MONGO_URI)
db = mongo_client['intratalent']
resume_collection = db['resumes']

# Initialize Anthropic client
anthropic = Anthropic(api_key=os.getenv('ANTHROPIC_API_KEY'))

# Initialize Streamlit app
st.set_page_config(
    page_title="IntraTalent Resume Processor",
    page_icon="📄",
    layout="wide"
)

def extract_text_from_pdf(pdf_content: bytes) -> str:
    """Extract text from PDF content."""
    try:
        # Create a temporary file to store the PDF content
        with tempfile.NamedTemporaryFile(mode='w+b', suffix='.pdf', delete=False) as temp_file:
            temp_file.write(pdf_content)
            temp_file_path = temp_file.name

        # Extract text from PDF
        doc = fitz.open(temp_file_path)
        text = ""
        for page_num in range(doc.page_count):
            page = doc.load_page(page_num)
            text += page.get_text() + "\n"
        doc.close()

        # Clean up temporary file
        os.unlink(temp_file_path)
        
        return text
    except Exception as e:
        st.error(f"Error extracting text from PDF: {e}")
        return ""

def extract_info_with_claude(resume_text: str) -> str:
    """Extract information from resume text using Claude."""
    st.write("🤖 Sending request to Claude API...")
    
    prompt = """
    Extract the following information from the given resume:
    1. Full Name
    2. List of all experiences with their descriptions (copy exactly from resume)
    Please format the output as follows:
    Name: [Full Name]
    Projects:
    1. [Experience/Project Name]: [Experience/Project Description]
    2. [Experience/Project Name]: [Experience/Project Description]
    ...
    Extract all experiences, including projects, leadership, work experience, research, etc. Don't include hyphens and put the entire description on one line.
    
    Here's the resume text:
    {resume_text}
    """.format(resume_text=resume_text)
    
    try:
        message = anthropic.messages.create(
            model="claude-3-haiku-20240307",
            max_tokens=4096,
            system="You are a helpful assistant that extracts information from resumes.",
            messages=[{
                "role": "user",
                "content": prompt
            }]
        )
        extracted_info = message.content[0].text
        st.write("✅ Received response from Claude:")
        st.code(extracted_info, language="text")
        
    except Exception as e:
        extracted_info = f"An error occurred: {e}"
        st.error(f"❌ API Error: {e}")
    
    return extracted_info

def get_pinecone_index(database_id: str) -> Index:
    # initialize connection to pinecone
    pc = Pinecone(api_key=os.getenv('PINECONE_API_KEY'))
        
    # if the index does not exist, we create it
    if not database_id in pc.list_indexes():
        pc.create_index(
            database_id,
            dimension=VOYAGEAI_VECTOR_DIM,
            spec=ServerlessSpec(
                cloud='aws',
                region='us-east-1'
            ),
            metric='cosine'
        )
    
    # connect to index
    index = pc.Index(database_id)

def add_to_voyage(person_name: str, person_projects: list) -> None:
    embeds = []
    metas = []
    ids = []
    index = get_pinecone_index(PINECONE_ID)
    vo = voyageai.Client(api_key=os.getenv('VOYAGEAI_API_KEY'))
    
    for i in range(len(person_projects)):
        # Get the ith project
        project = person_projects[i]

        # Embed the description
        embed = vo.embed(
            texts=project["description"],
            model='voyage-3-lite',
            truncation=False
        ).embeddings[0]
        embeds.append(embed)

        # Create metadata using person's name + project name
        meta = f"{person_name} {project['name']}"
        metas.append(meta)

        # Give it a unique id
        id = i
        ids.append(i)

    # create list of (id, vector, metadata) tuples to be upserted
    to_upsert = list(zip(ids, embeds, meta))
    
    for i in range(0, len(ids), VOYAGEAI_BATCH_SIZE):
        i_end = min(i+VOYAGEAI_BATCH_SIZE, len(ids))
        index.upsert(vectors=to_upsert[i:i_end])
    
    # let's view the index statistics
    st.write(index.describe_index_stats())

def parse_resume(uploaded_file: UploadedFile) -> Tuple[str, List[Dict]]:
    """Parse a resume file and return name and projects."""
    try:
        st.write(f"📝 Processing resume: {uploaded_file.name}")
        resume_content = uploaded_file.getvalue()
        st.write("📊 Extracting text from PDF...")
        
        resume_text = extract_text_from_pdf(resume_content)
        st.write("📄 Extracted text from PDF:")
        st.code(resume_text)
        
        extracted_info = extract_info_with_claude(resume_text)
        st.write("🔍 Parsing extracted information...")
        
        # Parse the extracted information
        lines = extracted_info.split('\n')
        name = lines[0].split(': ')[1] if len(lines) > 0 and ': ' in lines[0] else "Unknown"
        st.write(f"👤 Extracted name: {name}")
        
        projects = []
        project_started = False
        for line in lines:
            if line.strip() == "Projects:":
                project_started = True
                continue
            if project_started and line.strip():
                project_parts = line.split(': ', 1)
                if len(project_parts) == 2:
                    project_name = project_parts[0].split('. ', 1)[-1]  # Remove the number
                    project_description = project_parts[1]
                    projects.append({"name": project_name, "description": project_description})
        
        st.write("📋 Extracted projects:")
        st.json(projects)
        
        # Store in MongoDB
        resume_data = {
            "name": name,
            "projects": projects,
            "full_content": resume_text
        }
        add_to_voyage(name, projects)
        st.write("💾 Stored data in VoyageAI")
        
        return name, projects
        
    except Exception as e:
        st.error(f"❌ Error processing resume: {e}")
        return "Unknown", []

def process_resumes(uploaded_files: List[UploadedFile]) -> Dict:
    """Process multiple resumes and return results."""
    results = {}
    progress_bar = st.progress(0)
    
    for idx, file in enumerate(uploaded_files):
        st.write(f"\n---\n### Processing file {idx + 1} of {len(uploaded_files)}")
        
        if file.type != "application/pdf":
            st.warning(f"⚠️ Skipping {file.name}: Not a PDF file")
            continue
            
        try:
            name, projects = parse_resume(file)
            results[file.name] = {
                "name": name,
                "projects": projects
            }
            # Update progress
            progress_bar.progress((idx + 1) / len(uploaded_files))
            st.write(f"✅ Successfully processed {file.name}")
        except Exception as e:
            st.error(f"❌ Error processing {file.name}: {e}")
                
    return results

def display_results(results: Dict):
    """Display processed resume results in an organized manner."""
    if not results:
        return
        
    st.subheader("📊 Processed Resumes")
    
    for filename, data in results.items():
        with st.expander(f"📄 {data['name']} ({filename})"):
            st.write("🏷️ File details:")
            st.json({
                "filename": filename,
                "name": data['name'],
                "number_of_projects": len(data['projects'])
            })
            
            if data['projects']:
                st.write("📋 Projects:")
                df = pd.DataFrame(data['projects'])
                st.dataframe(
                    df,
                    column_config={
                        "name": "Project Name",
                        "description": "Description"
                    },
                    hide_index=True
                )
            else:
                st.info("ℹ️ No projects found in this resume")

def main():
    st.title("🎯 IntraTalent Resume Processor")
    
    # File uploader section
    st.header("📤 Upload Resumes")
    uploaded_files = st.file_uploader(
        "Upload up to 10 resumes (PDF only)",
        type=['pdf'],
        accept_multiple_files=True,
        key="resume_uploader"
    )
    
    # Validate number of files
    if uploaded_files and len(uploaded_files) > 10:
        st.error("⚠️ Maximum 10 files allowed. Please remove some files.")
        return
        
    # Process button
    if uploaded_files and st.button("🔄 Process Resumes"):
        with st.spinner("Processing resumes..."):
            st.write("🚀 Starting resume processing...")
            results = process_resumes(uploaded_files)
            st.session_state['processed_results'] = results
            st.write("✨ Processing complete!")
            display_results(results)
    
    # Query section
    st.header("🔍 Search Projects")
    query = st.text_area(
        "Enter your project requirements",
        placeholder="Example: Looking for team members with experience in machine learning and computer vision...",
        height=100
    )
    
    if query and st.button("🔎 Search"):
        if 'processed_results' not in st.session_state:
            st.warning("⚠️ Please process some resumes first!")
            return
            
        with st.spinner("Searching for matches..."):
            st.write("🔄 Preparing to search...")
            # Here you would implement the embedding and similarity search
            # Using the code from your original script
            st.success("✅ Search completed!")
            # Display results in a nice format
            st.subheader("🎯 Top Matches")
            # Placeholder for search results
            st.info("🔜 Feature coming soon: Will display matching projects and candidates based on similarity search")

if __name__ == "__main__":
    main()