Spaces:

IAT360Group29
/

final-project

Sleeping

App Files Files Community

final-project / app.py

kylezhao101

update app.py

09b06d4 4 months ago

raw

history blame

3.2 kB

	import gradio as gr
	from transformers import pipeline
	import PyPDF2

	# Load the token classification pipeline
	model_name = "jjzha/jobbert_knowledge_extraction"
	pipe = pipeline("token-classification", model=model_name)

	# Function to extract and highlight key skills/words from the job posting
	def extract_keywords_with_highlights(job_posting_text):
	results = pipe(job_posting_text)

	# Fix the `##` issue by reconstructing full words
	reconstructed_text = ""
	highlighted_words = set()
	previous_end = 0
	for result in results:
	start, end, word = result['start'], result['end'], result['word']
	# Remove `##` for subwords
	clean_word = word.replace("##", "")
	highlighted_words.add(clean_word.lower())
	# Add text before the current word
	reconstructed_text += job_posting_text[previous_end:start]
	# Highlight the cleaned word
	reconstructed_text += (
	f'<span style="background-color:yellow; font-weight:bold;" '
	f'title="Entity: {result["entity"]} (Score: {result["score"]:.2f})">'
	f"{clean_word}</span>"
	)
	previous_end = end
	# Add the remaining text
	reconstructed_text += job_posting_text[previous_end:]
	# Replace newline characters with <br> to preserve line breaks
	reconstructed_text = reconstructed_text.replace("\n", "<br>")

	return (
	f'<div style="font-family:Arial, sans-serif; line-height:1.5;">{reconstructed_text}</div>',
	highlighted_words,
	)

	# Function to check if highlighted words are in the resume
	def check_keywords_in_resume(resume_file_path, job_posting_text):
	# Extract text from the uploaded PDF resume
	with open(resume_file_path, "rb") as file:
	pdf_reader = PyPDF2.PdfReader(file)
	resume_text = " ".join(page.extract_text() for page in pdf_reader.pages)

	# Extract highlighted keywords from the job posting
	highlighted_html, highlighted_words = extract_keywords_with_highlights(job_posting_text)

	# Check if each highlighted word is in the resume
	resume_words = set(resume_text.lower().split())
	matched_words = highlighted_words.intersection(resume_words)
	missing_words = highlighted_words - matched_words

	# Prepare a summary
	matched_summary = f"Matched Keywords: {', '.join(matched_words)}"
	missing_summary = f"Missing Keywords: {', '.join(missing_words)}"
	return highlighted_html, matched_summary, missing_summary

	# Set up Gradio interface
	interface = gr.Interface(
	fn=check_keywords_in_resume,
	inputs=[
	gr.File(label="Upload Resume PDF", type="filepath"),
	gr.Textbox(label="Enter Job Posting Text", lines=30, placeholder="Paste job posting text here..."),
	],
	outputs=[
	gr.HTML(label="Highlighted Key Skills/Words in Job Posting"),
	gr.Textbox(label="Matched Keywords"),
	gr.Textbox(label="Missing Keywords"),
	],
	title="Resume vs Job Posting Skill Match with Highlights",
	description="Upload your resume and enter a job posting. The app will highlight key skills from the job posting and check if they are present in your resume.",
	)

	# Launch the Gradio app
	interface.launch()