Manojajj's picture
Update app.py
18b1dee verified
import gradio as gr
import pdfplumber
import torch
from transformers import pipeline
import pandas as pd
from huggingface_hub import login
import os
# Function to login using Hugging Face API token
def login_with_token(hf_token):
"""Login to Hugging Face using provided token."""
try:
login(token=hf_token)
return "Logged in successfully!"
except Exception as e:
return f"Error: {str(e)}"
# Load the model for Named Entity Recognition (NER)
nlp = pipeline("ner", model="dbmdz/bert-large-cased-finetuned-conll03-english", framework="pt")
def extract_text_from_pdf(pdf_file):
"""Extracts text from a PDF file using pdfplumber."""
try:
with pdfplumber.open(pdf_file.name) as pdf:
text = ''
for page in pdf.pages:
text += page.extract_text() or "" # Handle pages with no text
return text
except Exception as e:
print(f"Error reading PDF: {e}")
return ""
def parse_resume(pdf_file):
"""Parses the resume and extracts relevant information."""
# Extract text from PDF
resume_text = extract_text_from_pdf(pdf_file)
if not resume_text.strip():
print("No text found in PDF.")
return {}
# Use the NER model to identify entities in the resume
entities = nlp(resume_text)
# Initialize empty fields
name = email = phone = education = skills = experience = None
# Example parsing logic based on NER output
for entity in entities:
label = entity.get("entity", "")
word = entity.get("word", "").strip()
if label == "B-PER" or label == "I-PER":
name = (name or "") + word + " "
elif label == "B-ORG" or label == "I-ORG":
experience = (experience or "") + word + " "
elif "@" in word: # Simple email detection
email = word
elif label == "I-MISC":
skills = (skills or "") + word + ", "
# Clean up trailing spaces and commas
name = name.strip() if name else None
skills = skills.rstrip(", ") if skills else None
# Log the final parsed information
print(f"Parsed Info: Name={name}, Email={email}, Skills={skills}, Experience={experience}")
return {
'Name': name,
'Email': email,
'Phone': phone,
'Education': education,
'Skills': skills,
'Experience': experience,
}
def batch_process_resumes(pdf_files):
"""Processes a batch of resume PDFs and outputs an Excel file."""
all_resumes = []
for pdf_file in pdf_files:
resume_info = parse_resume(pdf_file)
# Only add the parsed resume info if there's meaningful data
if any(resume_info.values()):
all_resumes.append(resume_info)
# If no resumes are successfully parsed, return None
if not all_resumes:
print("No valid resume information was parsed.")
return None
# Convert to DataFrame
df = pd.DataFrame(all_resumes)
# Define the file path for the Excel file
output_file = "/tmp/parsed_resumes.xlsx"
# Save to Excel
df.to_excel(output_file, index=False)
# Return the path to the file for download
return output_file
# Gradio interface
with gr.Blocks() as demo:
gr.Markdown("### AI Resume Parser")
# User input for Hugging Face token
hf_token_input = gr.Textbox(label="Hugging Face Token", placeholder="Enter your Hugging Face API Token here")
# File input for resume files
file_input = gr.File(file_count="multiple", label="Upload Resumes (PDFs)")
# Output for results
output = gr.Textbox(label="Result")
# File output for the download link
download_link = gr.File(label="Download Excel File", file_count="single")
# Process button that triggers the login and resume parsing
process_button = gr.Button("Process Resumes")
# Function call when button is clicked
def process_resumes(hf_token, pdf_files):
# Attempt to log in with provided token
login_message = login_with_token(hf_token)
if "Error" not in login_message:
# Process resumes and generate the download link
excel_file_path = batch_process_resumes(pdf_files)
if excel_file_path:
return login_message + "\nExcel file with parsed resumes is ready for download.", excel_file_path
else:
return login_message + "\nNo valid resume information was parsed.", None
else:
return login_message, None
# Set up the button click event
process_button.click(process_resumes, inputs=[hf_token_input, file_input], outputs=[output, download_link])
# Launch the Gradio interface
demo.launch()