import gradio as gr from transformers import pipeline import PyPDF2 # Load the token classification pipeline model_name = "jjzha/jobbert_knowledge_extraction" pipe = pipeline("token-classification", model=model_name) # Function to extract and highlight key skills/words from the job posting def extract_keywords_with_highlights(job_posting_text): results = pipe(job_posting_text) # Fix the `##` issue by reconstructing full words reconstructed_text = "" highlighted_words = set() previous_end = 0 for result in results: start, end, word = result['start'], result['end'], result['word'] # Remove `##` for subwords clean_word = word.replace("##", "") highlighted_words.add(clean_word.lower()) # Add text before the current word reconstructed_text += job_posting_text[previous_end:start] # Highlight the cleaned word reconstructed_text += ( f'' f"{clean_word}" ) previous_end = end # Add the remaining text reconstructed_text += job_posting_text[previous_end:] # Replace newline characters with
to preserve line breaks reconstructed_text = reconstructed_text.replace("\n", "
") return ( f'
{reconstructed_text}
', highlighted_words, ) # Function to check if highlighted words are in the resume def check_keywords_in_resume(resume_file_path, job_posting_text): # Extract text from the uploaded PDF resume with open(resume_file_path, "rb") as file: pdf_reader = PyPDF2.PdfReader(file) resume_text = " ".join(page.extract_text() for page in pdf_reader.pages) # Extract highlighted keywords from the job posting highlighted_html, highlighted_words = extract_keywords_with_highlights(job_posting_text) # Check if each highlighted word is in the resume resume_words = set(resume_text.lower().split()) matched_words = highlighted_words.intersection(resume_words) missing_words = highlighted_words - matched_words # Prepare a summary matched_summary = f"Matched Keywords: {', '.join(matched_words)}" missing_summary = f"Missing Keywords: {', '.join(missing_words)}" return highlighted_html, matched_summary, missing_summary # Set up Gradio interface interface = gr.Interface( fn=check_keywords_in_resume, inputs=[ gr.File(label="Upload Resume PDF", type="filepath"), gr.Textbox(label="Enter Job Posting Text", lines=30, placeholder="Paste job posting text here..."), ], outputs=[ gr.HTML(label="Highlighted Key Skills/Words in Job Posting"), gr.Textbox(label="Matched Keywords"), gr.Textbox(label="Missing Keywords"), ], title="Resume vs Job Posting Skill Match with Highlights", description="Upload your resume and enter a job posting. The app will highlight key skills from the job posting and check if they are present in your resume.", ) # Launch the Gradio app interface.launch()