|
import gradio as gr |
|
import pdfplumber |
|
import torch |
|
from transformers import pipeline |
|
import pandas as pd |
|
from huggingface_hub import login |
|
import os |
|
|
|
|
|
def login_with_token(hf_token): |
|
"""Login to Hugging Face using provided token.""" |
|
try: |
|
login(token=hf_token) |
|
return "Logged in successfully!" |
|
except Exception as e: |
|
return f"Error: {str(e)}" |
|
|
|
|
|
nlp = pipeline("ner", model="dbmdz/bert-large-cased-finetuned-conll03-english", framework="pt") |
|
|
|
def extract_text_from_pdf(pdf_file): |
|
"""Extracts text from a PDF file using pdfplumber.""" |
|
try: |
|
with pdfplumber.open(pdf_file.name) as pdf: |
|
text = '' |
|
for page in pdf.pages: |
|
text += page.extract_text() or "" |
|
return text |
|
except Exception as e: |
|
print(f"Error reading PDF: {e}") |
|
return "" |
|
|
|
def parse_resume(pdf_file): |
|
"""Parses the resume and extracts relevant information.""" |
|
|
|
resume_text = extract_text_from_pdf(pdf_file) |
|
|
|
if not resume_text.strip(): |
|
print("No text found in PDF.") |
|
return {} |
|
|
|
|
|
entities = nlp(resume_text) |
|
|
|
|
|
name = email = phone = education = skills = experience = None |
|
|
|
|
|
for entity in entities: |
|
label = entity.get("entity", "") |
|
word = entity.get("word", "").strip() |
|
|
|
if label == "B-PER" or label == "I-PER": |
|
name = (name or "") + word + " " |
|
elif label == "B-ORG" or label == "I-ORG": |
|
experience = (experience or "") + word + " " |
|
elif "@" in word: |
|
email = word |
|
elif label == "I-MISC": |
|
skills = (skills or "") + word + ", " |
|
|
|
|
|
name = name.strip() if name else None |
|
skills = skills.rstrip(", ") if skills else None |
|
|
|
|
|
print(f"Parsed Info: Name={name}, Email={email}, Skills={skills}, Experience={experience}") |
|
|
|
return { |
|
'Name': name, |
|
'Email': email, |
|
'Phone': phone, |
|
'Education': education, |
|
'Skills': skills, |
|
'Experience': experience, |
|
} |
|
|
|
def batch_process_resumes(pdf_files): |
|
"""Processes a batch of resume PDFs and outputs an Excel file.""" |
|
all_resumes = [] |
|
for pdf_file in pdf_files: |
|
resume_info = parse_resume(pdf_file) |
|
|
|
|
|
if any(resume_info.values()): |
|
all_resumes.append(resume_info) |
|
|
|
|
|
if not all_resumes: |
|
print("No valid resume information was parsed.") |
|
return None |
|
|
|
|
|
df = pd.DataFrame(all_resumes) |
|
|
|
|
|
output_file = "/tmp/parsed_resumes.xlsx" |
|
|
|
|
|
df.to_excel(output_file, index=False) |
|
|
|
|
|
return output_file |
|
|
|
|
|
with gr.Blocks() as demo: |
|
gr.Markdown("### AI Resume Parser") |
|
|
|
|
|
hf_token_input = gr.Textbox(label="Hugging Face Token", placeholder="Enter your Hugging Face API Token here") |
|
|
|
|
|
file_input = gr.File(file_count="multiple", label="Upload Resumes (PDFs)") |
|
|
|
|
|
output = gr.Textbox(label="Result") |
|
|
|
|
|
download_link = gr.File(label="Download Excel File", file_count="single") |
|
|
|
|
|
process_button = gr.Button("Process Resumes") |
|
|
|
|
|
def process_resumes(hf_token, pdf_files): |
|
|
|
login_message = login_with_token(hf_token) |
|
|
|
if "Error" not in login_message: |
|
|
|
excel_file_path = batch_process_resumes(pdf_files) |
|
if excel_file_path: |
|
return login_message + "\nExcel file with parsed resumes is ready for download.", excel_file_path |
|
else: |
|
return login_message + "\nNo valid resume information was parsed.", None |
|
else: |
|
return login_message, None |
|
|
|
|
|
process_button.click(process_resumes, inputs=[hf_token_input, file_input], outputs=[output, download_link]) |
|
|
|
|
|
demo.launch() |
|
|