import gradio as gr
from transformers import pipeline
import PyPDF2
# Load the token classification pipeline
model_name = "jjzha/jobbert_knowledge_extraction"
pipe = pipeline("token-classification", model=model_name)
# Function to extract and highlight key skills/words from the job posting
def extract_keywords_with_highlights(job_posting_text):
results = pipe(job_posting_text)
# Fix the `##` issue by reconstructing full words
reconstructed_text = ""
highlighted_words = set()
previous_end = 0
for result in results:
start, end, word = result['start'], result['end'], result['word']
# Remove `##` for subwords
clean_word = word.replace("##", "")
# Add text before the current word
reconstructed_text += job_posting_text[previous_end:start]
# Highlight the cleaned word
reconstructed_text += (
previous_end = end
# Add the remaining text
reconstructed_text += job_posting_text[previous_end:]
# Replace newline characters with
to preserve line breaks
reconstructed_text = reconstructed_text.replace("\n", "
return (
# Function to check if highlighted words are in the resume
def check_keywords_in_resume(resume_file_path, job_posting_text):
# Extract text from the uploaded PDF resume
with open(resume_file_path, "rb") as file:
pdf_reader = PyPDF2.PdfReader(file)
resume_text = " ".join(page.extract_text() for page in pdf_reader.pages)
# Extract highlighted keywords from the job posting
highlighted_html, highlighted_words = extract_keywords_with_highlights(job_posting_text)
# Check if each highlighted word is in the resume
resume_words = set(resume_text.lower().split())
matched_words = highlighted_words.intersection(resume_words)
missing_words = highlighted_words - matched_words
# Prepare a summary
matched_summary = f"Matched Keywords: {', '.join(matched_words)}"
missing_summary = f"Missing Keywords: {', '.join(missing_words)}"
return highlighted_html, matched_summary, missing_summary
# Set up Gradio interface
interface = gr.Interface(
gr.File(label="Upload Resume PDF", type="filepath"),
gr.Textbox(label="Enter Job Posting Text", lines=30, placeholder="Paste job posting text here..."),
gr.HTML(label="Highlighted Key Skills/Words in Job Posting"),
gr.Textbox(label="Matched Keywords"),
gr.Textbox(label="Missing Keywords"),
title="Resume vs Job Posting Skill Match with Highlights",
description="Upload your resume and enter a job posting. The app will highlight key skills from the job posting and check if they are present in your resume.",
# Launch the Gradio app