import gradio as gr
import pdfplumber
import torch
from transformers import pipeline
import pandas as pd
from huggingface_hub import login
import os

# Function to login using Hugging Face API token
def login_with_token(hf_token):
    """Login to Hugging Face using provided token."""
    try:
        login(token=hf_token)
        return "Logged in successfully!"
    except Exception as e:
        return f"Error: {str(e)}"

# Load the model for Named Entity Recognition (NER)
nlp = pipeline("ner", model="dbmdz/bert-large-cased-finetuned-conll03-english", framework="pt")

def extract_text_from_pdf(pdf_file):
    """Extracts text from a PDF file using pdfplumber."""
    try:
        with pdfplumber.open(pdf_file.name) as pdf:
            text = ''
            for page in pdf.pages:
                text += page.extract_text() or ""  # Handle pages with no text
        return text
    except Exception as e:
        print(f"Error reading PDF: {e}")
        return ""

def parse_resume(pdf_file):
    """Parses the resume and extracts relevant information."""
    # Extract text from PDF
    resume_text = extract_text_from_pdf(pdf_file)
    
    if not resume_text.strip():
        print("No text found in PDF.")
        return {}

    # Use the NER model to identify entities in the resume
    entities = nlp(resume_text)

    # Initialize empty fields
    name = email = phone = education = skills = experience = None

    # Example parsing logic based on NER output
    for entity in entities:
        label = entity.get("entity", "")
        word = entity.get("word", "").strip()

        if label == "B-PER" or label == "I-PER":
            name = (name or "") + word + " "
        elif label == "B-ORG" or label == "I-ORG":
            experience = (experience or "") + word + " "
        elif "@" in word:  # Simple email detection
            email = word
        elif label == "I-MISC":
            skills = (skills or "") + word + ", "

    # Clean up trailing spaces and commas
    name = name.strip() if name else None
    skills = skills.rstrip(", ") if skills else None

    # Log the final parsed information
    print(f"Parsed Info: Name={name}, Email={email}, Skills={skills}, Experience={experience}")

    return {
        'Name': name,
        'Email': email,
        'Phone': phone,
        'Education': education,
        'Skills': skills,
        'Experience': experience,
    }

def batch_process_resumes(pdf_files):
    """Processes a batch of resume PDFs and outputs an Excel file."""
    all_resumes = []
    for pdf_file in pdf_files:
        resume_info = parse_resume(pdf_file)
        
        # Only add the parsed resume info if there's meaningful data
        if any(resume_info.values()):
            all_resumes.append(resume_info)
    
    # If no resumes are successfully parsed, return None
    if not all_resumes:
        print("No valid resume information was parsed.")
        return None

    # Convert to DataFrame
    df = pd.DataFrame(all_resumes)
    
    # Define the file path for the Excel file
    output_file = "/tmp/parsed_resumes.xlsx"
    
    # Save to Excel
    df.to_excel(output_file, index=False)
    
    # Return the path to the file for download
    return output_file

# Gradio interface
with gr.Blocks() as demo:
    gr.Markdown("### AI Resume Parser")
    
    # User input for Hugging Face token
    hf_token_input = gr.Textbox(label="Hugging Face Token", placeholder="Enter your Hugging Face API Token here")
    
    # File input for resume files
    file_input = gr.File(file_count="multiple", label="Upload Resumes (PDFs)")
    
    # Output for results
    output = gr.Textbox(label="Result")
    
    # File output for the download link
    download_link = gr.File(label="Download Excel File", file_count="single")
    
    # Process button that triggers the login and resume parsing
    process_button = gr.Button("Process Resumes")
    
    # Function call when button is clicked
    def process_resumes(hf_token, pdf_files):
        # Attempt to log in with provided token
        login_message = login_with_token(hf_token)
        
        if "Error" not in login_message:
            # Process resumes and generate the download link
            excel_file_path = batch_process_resumes(pdf_files)
            if excel_file_path:
                return login_message + "\nExcel file with parsed resumes is ready for download.", excel_file_path
            else:
                return login_message + "\nNo valid resume information was parsed.", None
        else:
            return login_message, None

    # Set up the button click event
    process_button.click(process_resumes, inputs=[hf_token_input, file_input], outputs=[output, download_link])

# Launch the Gradio interface
demo.launch()