final-project / app.py
kylezhao101's picture
update app.py
09b06d4
raw
history blame
3.2 kB
import gradio as gr
from transformers import pipeline
import PyPDF2
# Load the token classification pipeline
model_name = "jjzha/jobbert_knowledge_extraction"
pipe = pipeline("token-classification", model=model_name)
# Function to extract and highlight key skills/words from the job posting
def extract_keywords_with_highlights(job_posting_text):
results = pipe(job_posting_text)
# Fix the `##` issue by reconstructing full words
reconstructed_text = ""
highlighted_words = set()
previous_end = 0
for result in results:
start, end, word = result['start'], result['end'], result['word']
# Remove `##` for subwords
clean_word = word.replace("##", "")
highlighted_words.add(clean_word.lower())
# Add text before the current word
reconstructed_text += job_posting_text[previous_end:start]
# Highlight the cleaned word
reconstructed_text += (
f'<span style="background-color:yellow; font-weight:bold;" '
f'title="Entity: {result["entity"]} (Score: {result["score"]:.2f})">'
f"{clean_word}</span>"
)
previous_end = end
# Add the remaining text
reconstructed_text += job_posting_text[previous_end:]
# Replace newline characters with <br> to preserve line breaks
reconstructed_text = reconstructed_text.replace("\n", "<br>")
return (
f'<div style="font-family:Arial, sans-serif; line-height:1.5;">{reconstructed_text}</div>',
highlighted_words,
)
# Function to check if highlighted words are in the resume
def check_keywords_in_resume(resume_file_path, job_posting_text):
# Extract text from the uploaded PDF resume
with open(resume_file_path, "rb") as file:
pdf_reader = PyPDF2.PdfReader(file)
resume_text = " ".join(page.extract_text() for page in pdf_reader.pages)
# Extract highlighted keywords from the job posting
highlighted_html, highlighted_words = extract_keywords_with_highlights(job_posting_text)
# Check if each highlighted word is in the resume
resume_words = set(resume_text.lower().split())
matched_words = highlighted_words.intersection(resume_words)
missing_words = highlighted_words - matched_words
# Prepare a summary
matched_summary = f"Matched Keywords: {', '.join(matched_words)}"
missing_summary = f"Missing Keywords: {', '.join(missing_words)}"
return highlighted_html, matched_summary, missing_summary
# Set up Gradio interface
interface = gr.Interface(
fn=check_keywords_in_resume,
inputs=[
gr.File(label="Upload Resume PDF", type="filepath"),
gr.Textbox(label="Enter Job Posting Text", lines=30, placeholder="Paste job posting text here..."),
],
outputs=[
gr.HTML(label="Highlighted Key Skills/Words in Job Posting"),
gr.Textbox(label="Matched Keywords"),
gr.Textbox(label="Missing Keywords"),
],
title="Resume vs Job Posting Skill Match with Highlights",
description="Upload your resume and enter a job posting. The app will highlight key skills from the job posting and check if they are present in your resume.",
)
# Launch the Gradio app
interface.launch()