Spaces:

CR7CAD
/

ISOM5240FinalProject

Sleeping

App Files Files Community

ISOM5240FinalProject / app.py

CR7CAD

Update app.py

0d4f4dd verified 4 months ago

raw

history blame

7.52 kB

	import os
	import tempfile
	import re
	import time
	import streamlit as st
	import docx
	import textract
	from sentence_transformers import SentenceTransformer, util
	from transformers import pipeline

	#####################################
	# Function: Extract Text from File
	#####################################
	def extract_text_from_file(file_obj):
	"""
	Extract text from .doc and .docx files.
	Returns the extracted text or an error message if extraction fails.
	"""
	filename = file_obj.name
	ext = os.path.splitext(filename)[1].lower()
	text = ""

	if ext == ".docx":
	try:
	document = docx.Document(file_obj)
	text = "\n".join([para.text for para in document.paragraphs])
	except Exception as e:
	text = f"Error processing DOCX file: {e}"
	elif ext == ".doc":
	try:
	# textract requires a file name; save the file temporarily.
	with tempfile.NamedTemporaryFile(delete=False, suffix=".doc") as tmp:
	tmp.write(file_obj.read())
	tmp.flush()
	tmp_filename = tmp.name
	text = textract.process(tmp_filename).decode("utf-8")
	except Exception as e:
	text = f"Error processing DOC file: {e}"
	finally:
	try:
	os.remove(tmp_filename)
	except Exception:
	pass
	else:
	text = "Unsupported file type."
	return text

	#####################################
	# Function: Summarize Resume Text using a Transformer Model
	#####################################
	@st.cache_resource(show_spinner=False)
	def load_summarizer():
	"""
	Loads the summarization pipeline using a transformer model.
	We use the model "google/pegasus-xsum" for summarization.
	"""
	return pipeline("summarization", model="google/pegasus-xsum")

	def summarize_resume_text(resume_text):
	"""
	Generates a concise summary of the resume text using the summarization model.
	If the resume text is very long, we trim it to avoid hitting the model's maximum input size.
	"""
	summarizer = load_summarizer()

	# Trim resume_text if it's too long
	max_input_length = 1024 # adjust as needed
	if len(resume_text) > max_input_length:
	st.info(f"Resume text is longer than {max_input_length} characters. Trimming text for summarization...")
	resume_text = resume_text[:max_input_length]

	# Generate summary
	summary_result = summarizer(resume_text, max_length=150, min_length=40, do_sample=False)
	candidate_summary = summary_result[0]['summary_text']
	return candidate_summary

	#####################################
	# Function: Compare Candidate Summary to Company Prompt
	#####################################
	def compute_suitability(candidate_summary, company_prompt, model):
	"""
	Compute the cosine similarity between candidate summary and company prompt embeddings.
	Returns a score in the range [0, 1].
	"""
	candidate_embed = model.encode(candidate_summary, convert_to_tensor=True)
	company_embed = model.encode(company_prompt, convert_to_tensor=True)
	cosine_sim = util.cos_sim(candidate_embed, company_embed)
	score = float(cosine_sim.item())
	return score

	#####################################
	# Main Resume Processing Logic
	#####################################
	def process_resume(file_obj):
	"""
	Extracts text from the uploaded file and then generates a summary
	using a text summarization model.
	"""
	st.info("Extracting text from resume...")
	resume_text = extract_text_from_file(file_obj)

	# Check if resume_text is valid
	if not resume_text or resume_text.strip() == "":
	st.error("No text could be extracted. Please check your resume file!")
	return ""

	st.info(f"Text extraction complete. Extracted {len(resume_text)} characters.")
	time.sleep(0.5) # slight delay to let the user read the info message

	st.info("Generating candidate summary, please wait...")
	candidate_summary = summarize_resume_text(resume_text)
	st.info("Candidate summary generated.")
	return candidate_summary

	#####################################
	# Load the Sentence-BERT Model (Semantic Similarity Model)
	#####################################
	@st.cache_resource(show_spinner=False)
	def load_sbert_model():
	# This loads the Sentence-BERT model "all-MiniLM-L6-v2"
	return SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")

	# Load Sentence-BERT model for computing semantic similarity.
	sbert_model = load_sbert_model()

	#####################################
	# Streamlit Interface
	#####################################
	st.title("Resume Analyzer and Company Suitability Checker")
	st.markdown(
	"""
	Upload your resume file in .doc or .docx format. The app performs the following tasks:
	1. Extracts text from the resume.
	2. Uses a transformer-based text summarization model (google/pegasus-xsum) to generate a concise candidate summary.
	3. Compares the candidate summary with a company profile (using Sentence-BERT) to produce a suitability score.
	"""
	)

	# File uploader for resume
	uploaded_file = st.file_uploader("Upload Resume", type=["doc", "docx"])

	# Button to process the resume and store the summary in session state.
	if st.button("Process Resume"):
	if uploaded_file is None:
	st.error("Please upload a resume file first.")
	else:
	with st.spinner("Processing resume..."):
	candidate_summary = process_resume(uploaded_file)
	if candidate_summary: # only if summary is generated
	st.session_state["candidate_summary"] = candidate_summary
	if candidate_summary:
	st.subheader("Candidate Summary")
	st.markdown(candidate_summary)

	# Pre-defined company prompt for Google LLC.
	default_company_prompt = (
	"Google LLC, a global leader in technology and innovation, specializes in internet services, cloud computing, "
	"artificial intelligence, and software development. As part of Alphabet Inc., Google seeks candidates with strong "
	"problem-solving skills, adaptability, and collaboration abilities. Technical roles require proficiency in programming "
	"languages such as Python, Java, C++, Go, or JavaScript, with expertise in data structures, algorithms, and system design. "
	"Additionally, skills in AI, cybersecurity, UX/UI design, and digital marketing are highly valued. Google fosters a culture "
	"of innovation, expecting candidates to demonstrate creativity, analytical thinking, and a passion for cutting-edge technology."
	)

	# Company prompt text area.
	company_prompt = st.text_area(
	"Enter company details:",
	value=default_company_prompt,
	height=150,
	)

	# Button to compute the suitability score.
	if st.button("Compute Suitability Score"):
	if "candidate_summary" not in st.session_state:
	st.error("Please process the resume first!")
	else:
	candidate_summary = st.session_state["candidate_summary"]
	if candidate_summary.strip() == "":
	st.error("Candidate summary is empty; please check your resume file.")
	elif company_prompt.strip() == "":
	st.error("Please enter the company information.")
	else:
	with st.spinner("Computing suitability score..."):
	score = compute_suitability(candidate_summary, company_prompt, sbert_model)
	st.success(f"Suitability Score: {score:.2f} (range 0 to 1)")