File size: 4,781 Bytes
4eec766 75da080 4eec766 75da080 88324a0 4eec766 7ddce15 18b1dee 7ddce15 75da080 4eec766 18b1dee 4eec766 75da080 18b1dee 75da080 4eec766 18b1dee b5578a6 75da080 4eec766 75da080 4eec766 18b1dee b5578a6 75da080 4eec766 75da080 18b1dee 75da080 4eec766 75da080 b5578a6 18b1dee b5578a6 75da080 18b1dee 75da080 88324a0 75da080 88324a0 4eec766 75da080 7ddce15 75da080 7ddce15 75da080 7ddce15 88324a0 2a2cabb 88324a0 7ddce15 75da080 7ddce15 18b1dee 88324a0 18b1dee 7ddce15 88324a0 7ddce15 88324a0 4eec766 7ddce15 75da080 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 |
import gradio as gr
import pdfplumber
import torch
from transformers import pipeline
import pandas as pd
from huggingface_hub import login
import os
# Function to login using Hugging Face API token
def login_with_token(hf_token):
"""Login to Hugging Face using provided token."""
try:
login(token=hf_token)
return "Logged in successfully!"
except Exception as e:
return f"Error: {str(e)}"
# Load the model for Named Entity Recognition (NER)
nlp = pipeline("ner", model="dbmdz/bert-large-cased-finetuned-conll03-english", framework="pt")
def extract_text_from_pdf(pdf_file):
"""Extracts text from a PDF file using pdfplumber."""
try:
with pdfplumber.open(pdf_file.name) as pdf:
text = ''
for page in pdf.pages:
text += page.extract_text() or "" # Handle pages with no text
return text
except Exception as e:
print(f"Error reading PDF: {e}")
return ""
def parse_resume(pdf_file):
"""Parses the resume and extracts relevant information."""
# Extract text from PDF
resume_text = extract_text_from_pdf(pdf_file)
if not resume_text.strip():
print("No text found in PDF.")
return {}
# Use the NER model to identify entities in the resume
entities = nlp(resume_text)
# Initialize empty fields
name = email = phone = education = skills = experience = None
# Example parsing logic based on NER output
for entity in entities:
label = entity.get("entity", "")
word = entity.get("word", "").strip()
if label == "B-PER" or label == "I-PER":
name = (name or "") + word + " "
elif label == "B-ORG" or label == "I-ORG":
experience = (experience or "") + word + " "
elif "@" in word: # Simple email detection
email = word
elif label == "I-MISC":
skills = (skills or "") + word + ", "
# Clean up trailing spaces and commas
name = name.strip() if name else None
skills = skills.rstrip(", ") if skills else None
# Log the final parsed information
print(f"Parsed Info: Name={name}, Email={email}, Skills={skills}, Experience={experience}")
return {
'Name': name,
'Email': email,
'Phone': phone,
'Education': education,
'Skills': skills,
'Experience': experience,
}
def batch_process_resumes(pdf_files):
"""Processes a batch of resume PDFs and outputs an Excel file."""
all_resumes = []
for pdf_file in pdf_files:
resume_info = parse_resume(pdf_file)
# Only add the parsed resume info if there's meaningful data
if any(resume_info.values()):
all_resumes.append(resume_info)
# If no resumes are successfully parsed, return None
if not all_resumes:
print("No valid resume information was parsed.")
return None
# Convert to DataFrame
df = pd.DataFrame(all_resumes)
# Define the file path for the Excel file
output_file = "/tmp/parsed_resumes.xlsx"
# Save to Excel
df.to_excel(output_file, index=False)
# Return the path to the file for download
return output_file
# Gradio interface
with gr.Blocks() as demo:
gr.Markdown("### AI Resume Parser")
# User input for Hugging Face token
hf_token_input = gr.Textbox(label="Hugging Face Token", placeholder="Enter your Hugging Face API Token here")
# File input for resume files
file_input = gr.File(file_count="multiple", label="Upload Resumes (PDFs)")
# Output for results
output = gr.Textbox(label="Result")
# File output for the download link
download_link = gr.File(label="Download Excel File", file_count="single")
# Process button that triggers the login and resume parsing
process_button = gr.Button("Process Resumes")
# Function call when button is clicked
def process_resumes(hf_token, pdf_files):
# Attempt to log in with provided token
login_message = login_with_token(hf_token)
if "Error" not in login_message:
# Process resumes and generate the download link
excel_file_path = batch_process_resumes(pdf_files)
if excel_file_path:
return login_message + "\nExcel file with parsed resumes is ready for download.", excel_file_path
else:
return login_message + "\nNo valid resume information was parsed.", None
else:
return login_message, None
# Set up the button click event
process_button.click(process_resumes, inputs=[hf_token_input, file_input], outputs=[output, download_link])
# Launch the Gradio interface
demo.launch()
|