File size: 3,203 Bytes
09b06d4
0f51a7f
09b06d4
6137008
0f51a7f
 
 
6759ca3
09b06d4
 
 
6759ca3
09b06d4
 
 
 
0f51a7f
09b06d4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
import gradio as gr
from transformers import pipeline
import PyPDF2

# Load the token classification pipeline
model_name = "jjzha/jobbert_knowledge_extraction"
pipe = pipeline("token-classification", model=model_name)

# Function to extract and highlight key skills/words from the job posting
def extract_keywords_with_highlights(job_posting_text):
    results = pipe(job_posting_text)

    # Fix the `##` issue by reconstructing full words
    reconstructed_text = ""
    highlighted_words = set()
    previous_end = 0
    for result in results:
        start, end, word = result['start'], result['end'], result['word']
        # Remove `##` for subwords
        clean_word = word.replace("##", "")
        highlighted_words.add(clean_word.lower())
        # Add text before the current word
        reconstructed_text += job_posting_text[previous_end:start]
        # Highlight the cleaned word
        reconstructed_text += (
            f'<span style="background-color:yellow; font-weight:bold;" '
            f'title="Entity: {result["entity"]} (Score: {result["score"]:.2f})">'
            f"{clean_word}</span>"
        )
        previous_end = end
    # Add the remaining text
    reconstructed_text += job_posting_text[previous_end:]
    # Replace newline characters with <br> to preserve line breaks
    reconstructed_text = reconstructed_text.replace("\n", "<br>")

    return (
        f'<div style="font-family:Arial, sans-serif; line-height:1.5;">{reconstructed_text}</div>',
        highlighted_words,
    )

# Function to check if highlighted words are in the resume
def check_keywords_in_resume(resume_file_path, job_posting_text):
    # Extract text from the uploaded PDF resume
    with open(resume_file_path, "rb") as file:
        pdf_reader = PyPDF2.PdfReader(file)
        resume_text = " ".join(page.extract_text() for page in pdf_reader.pages)

    # Extract highlighted keywords from the job posting
    highlighted_html, highlighted_words = extract_keywords_with_highlights(job_posting_text)

    # Check if each highlighted word is in the resume
    resume_words = set(resume_text.lower().split())
    matched_words = highlighted_words.intersection(resume_words)
    missing_words = highlighted_words - matched_words

    # Prepare a summary
    matched_summary = f"Matched Keywords: {', '.join(matched_words)}"
    missing_summary = f"Missing Keywords: {', '.join(missing_words)}"
    return highlighted_html, matched_summary, missing_summary

# Set up Gradio interface
interface = gr.Interface(
    fn=check_keywords_in_resume,
    inputs=[
        gr.File(label="Upload Resume PDF", type="filepath"),
        gr.Textbox(label="Enter Job Posting Text", lines=30, placeholder="Paste job posting text here..."),
    ],
    outputs=[
        gr.HTML(label="Highlighted Key Skills/Words in Job Posting"),
        gr.Textbox(label="Matched Keywords"),
        gr.Textbox(label="Missing Keywords"),
    ],
    title="Resume vs Job Posting Skill Match with Highlights",
    description="Upload your resume and enter a job posting. The app will highlight key skills from the job posting and check if they are present in your resume.",
)

# Launch the Gradio app
interface.launch()