Spaces:
Sleeping
Sleeping
import gradio as gr | |
from transformers import pipeline | |
import PyPDF2 | |
# Load the token classification pipeline | |
model_name = "jjzha/jobbert_knowledge_extraction" | |
pipe = pipeline("token-classification", model=model_name) | |
# Function to extract and highlight key skills/words from the job posting | |
def extract_keywords_with_highlights(job_posting_text): | |
results = pipe(job_posting_text) | |
# Fix the `##` issue by reconstructing full words | |
reconstructed_text = "" | |
highlighted_words = set() | |
previous_end = 0 | |
for result in results: | |
start, end, word = result['start'], result['end'], result['word'] | |
# Remove `##` for subwords | |
clean_word = word.replace("##", "") | |
highlighted_words.add(clean_word.lower()) | |
# Add text before the current word | |
reconstructed_text += job_posting_text[previous_end:start] | |
# Highlight the cleaned word | |
reconstructed_text += ( | |
f'<span style="background-color:yellow; font-weight:bold;" ' | |
f'title="Entity: {result["entity"]} (Score: {result["score"]:.2f})">' | |
f"{clean_word}</span>" | |
) | |
previous_end = end | |
# Add the remaining text | |
reconstructed_text += job_posting_text[previous_end:] | |
# Replace newline characters with <br> to preserve line breaks | |
reconstructed_text = reconstructed_text.replace("\n", "<br>") | |
return ( | |
f'<div style="font-family:Arial, sans-serif; line-height:1.5;">{reconstructed_text}</div>', | |
highlighted_words, | |
) | |
# Function to check if highlighted words are in the resume | |
def check_keywords_in_resume(resume_file_path, job_posting_text): | |
# Extract text from the uploaded PDF resume | |
with open(resume_file_path, "rb") as file: | |
pdf_reader = PyPDF2.PdfReader(file) | |
resume_text = " ".join(page.extract_text() for page in pdf_reader.pages) | |
# Extract highlighted keywords from the job posting | |
highlighted_html, highlighted_words = extract_keywords_with_highlights(job_posting_text) | |
# Check if each highlighted word is in the resume | |
resume_words = set(resume_text.lower().split()) | |
matched_words = highlighted_words.intersection(resume_words) | |
missing_words = highlighted_words - matched_words | |
# Prepare a summary | |
matched_summary = f"Matched Keywords: {', '.join(matched_words)}" | |
missing_summary = f"Missing Keywords: {', '.join(missing_words)}" | |
return highlighted_html, matched_summary, missing_summary | |
# Set up Gradio interface | |
interface = gr.Interface( | |
fn=check_keywords_in_resume, | |
inputs=[ | |
gr.File(label="Upload Resume PDF", type="filepath"), | |
gr.Textbox(label="Enter Job Posting Text", lines=30, placeholder="Paste job posting text here..."), | |
], | |
outputs=[ | |
gr.HTML(label="Highlighted Key Skills/Words in Job Posting"), | |
gr.Textbox(label="Matched Keywords"), | |
gr.Textbox(label="Missing Keywords"), | |
], | |
title="Resume vs Job Posting Skill Match with Highlights", | |
description="Upload your resume and enter a job posting. The app will highlight key skills from the job posting and check if they are present in your resume.", | |
) | |
# Launch the Gradio app | |
interface.launch() | |