Spaces:

Manojajj
/

bert-base-cased-resume_parser

Sleeping

App Files Files Community

bert-base-cased-resume_parser / app.py

Manojajj

Update app.py

18b1dee verified 5 months ago

raw

history blame contribute delete

4.78 kB

	import gradio as gr
	import pdfplumber
	import torch
	from transformers import pipeline
	import pandas as pd
	from huggingface_hub import login
	import os

	# Function to login using Hugging Face API token
	def login_with_token(hf_token):
	"""Login to Hugging Face using provided token."""
	try:
	login(token=hf_token)
	return "Logged in successfully!"
	except Exception as e:
	return f"Error: {str(e)}"

	# Load the model for Named Entity Recognition (NER)
	nlp = pipeline("ner", model="dbmdz/bert-large-cased-finetuned-conll03-english", framework="pt")

	def extract_text_from_pdf(pdf_file):
	"""Extracts text from a PDF file using pdfplumber."""
	try:
	with pdfplumber.open(pdf_file.name) as pdf:
	text = ''
	for page in pdf.pages:
	text += page.extract_text() or "" # Handle pages with no text
	return text
	except Exception as e:
	print(f"Error reading PDF: {e}")
	return ""

	def parse_resume(pdf_file):
	"""Parses the resume and extracts relevant information."""
	# Extract text from PDF
	resume_text = extract_text_from_pdf(pdf_file)

	if not resume_text.strip():
	print("No text found in PDF.")
	return {}

	# Use the NER model to identify entities in the resume
	entities = nlp(resume_text)

	# Initialize empty fields
	name = email = phone = education = skills = experience = None

	# Example parsing logic based on NER output
	for entity in entities:
	label = entity.get("entity", "")
	word = entity.get("word", "").strip()

	if label == "B-PER" or label == "I-PER":
	name = (name or "") + word + " "
	elif label == "B-ORG" or label == "I-ORG":
	experience = (experience or "") + word + " "
	elif "@" in word: # Simple email detection
	email = word
	elif label == "I-MISC":
	skills = (skills or "") + word + ", "

	# Clean up trailing spaces and commas
	name = name.strip() if name else None
	skills = skills.rstrip(", ") if skills else None

	# Log the final parsed information
	print(f"Parsed Info: Name={name}, Email={email}, Skills={skills}, Experience={experience}")

	return {
	'Name': name,
	'Email': email,
	'Phone': phone,
	'Education': education,
	'Skills': skills,
	'Experience': experience,
	}

	def batch_process_resumes(pdf_files):
	"""Processes a batch of resume PDFs and outputs an Excel file."""
	all_resumes = []
	for pdf_file in pdf_files:
	resume_info = parse_resume(pdf_file)

	# Only add the parsed resume info if there's meaningful data
	if any(resume_info.values()):
	all_resumes.append(resume_info)

	# If no resumes are successfully parsed, return None
	if not all_resumes:
	print("No valid resume information was parsed.")
	return None

	# Convert to DataFrame
	df = pd.DataFrame(all_resumes)

	# Define the file path for the Excel file
	output_file = "/tmp/parsed_resumes.xlsx"

	# Save to Excel
	df.to_excel(output_file, index=False)

	# Return the path to the file for download
	return output_file

	# Gradio interface
	with gr.Blocks() as demo:
	gr.Markdown("### AI Resume Parser")

	# User input for Hugging Face token
	hf_token_input = gr.Textbox(label="Hugging Face Token", placeholder="Enter your Hugging Face API Token here")

	# File input for resume files
	file_input = gr.File(file_count="multiple", label="Upload Resumes (PDFs)")

	# Output for results
	output = gr.Textbox(label="Result")

	# File output for the download link
	download_link = gr.File(label="Download Excel File", file_count="single")

	# Process button that triggers the login and resume parsing
	process_button = gr.Button("Process Resumes")

	# Function call when button is clicked
	def process_resumes(hf_token, pdf_files):
	# Attempt to log in with provided token
	login_message = login_with_token(hf_token)

	if "Error" not in login_message:
	# Process resumes and generate the download link
	excel_file_path = batch_process_resumes(pdf_files)
	if excel_file_path:
	return login_message + "\nExcel file with parsed resumes is ready for download.", excel_file_path
	else:
	return login_message + "\nNo valid resume information was parsed.", None
	else:
	return login_message, None

	# Set up the button click event
	process_button.click(process_resumes, inputs=[hf_token_input, file_input], outputs=[output, download_link])

	# Launch the Gradio interface
	demo.launch()