Spaces:

CR7CAD
/

ISOM5240FinalProject

Sleeping

App Files Files Community

ISOM5240FinalProject / app.py

CR7CAD

Update app.py

d836318 verified 4 months ago

raw

history blame

5.32 kB

	import os
	import tempfile
	import re
	import streamlit as st
	import docx
	import textract

	#####################################
	# Function: Extract Text from File
	#####################################
	def extract_text_from_file(file_obj):
	"""
	Extract text from .doc and .docx files.
	Returns the extracted text or an error message if extraction fails.
	"""
	filename = file_obj.name
	ext = os.path.splitext(filename)[1].lower()
	text = ""

	if ext == ".docx":
	try:
	document = docx.Document(file_obj)
	text = "\n".join([para.text for para in document.paragraphs])
	except Exception as e:
	text = f"Error processing DOCX file: {e}"
	elif ext == ".doc":
	try:
	# textract requires a filename, so create a temporary file.
	with tempfile.NamedTemporaryFile(delete=False, suffix=".doc") as tmp:
	tmp.write(file_obj.read())
	tmp.flush()
	tmp_filename = tmp.name
	text = textract.process(tmp_filename).decode("utf-8")
	except Exception as e:
	text = f"Error processing DOC file: {e}"
	finally:
	try:
	os.remove(tmp_filename)
	except Exception:
	pass
	else:
	text = "Unsupported file type."

	return text

	#####################################
	# Function: Extract Basic Resume Information
	#####################################
	def extract_basic_resume_info(text):
	"""
	Parse the extracted text to summarize basic info:
	- Name
	- Age
	- Work Experience
	- Expected Industry/Direction
	Returns a dictionary of extracted data.
	"""
	info = {
	"Name": None,
	"Age": None,
	"Work Experience": None,
	"Expected Industry/Direction": None,
	}

	# Extract Name (e.g., "Name: John Doe")
	name_match = re.search(r"[Nn]ame[:\-]\s*([A-Za-z\s]+)", text)
	if name_match:
	info["Name"] = name_match.group(1).strip()
	else:
	# Heuristic: search for a line with two or three capitalized words.
	potential_names = re.findall(r"\b[A-Z][a-z]+(?:\s+[A-Z][a-z]+){1,2}\b", text)
	if potential_names:
	info["Name"] = potential_names[0]

	# Extract Age (e.g., "Age: 28")
	age_match = re.search(r"[Aa]ge[:\-]\s*(\d{1,2})", text)
	if age_match:
	info["Age"] = age_match.group(1)

	# Extract Work Experience (e.g., "5 years of experience")
	exp_match = re.search(r"(\d+)\s+(years\|yrs)\s+(?:of\s+)?experience", text, re.IGNORECASE)
	if exp_match:
	info["Work Experience"] = f"{exp_match.group(1)} {exp_match.group(2)}"
	else:
	# Fallback: look for overall experience information.
	exp_line = re.search(r"(Experience\|Background)[:\-]\s(.)", text, re.IGNORECASE)
	if exp_line:
	info["Work Experience"] = exp_line.group(2).strip()

	# Extract Expected Industry/Direction
	industry_match = re.search(r"(Industry\|Interest\|Direction)[:\-]\s*(.+)", text, re.IGNORECASE)
	if industry_match:
	info["Expected Industry/Direction"] = industry_match.group(2).strip()

	return info

	#####################################
	# Function: Summarize Basic Info into a Paragraph
	#####################################
	def summarize_basic_info(info):
	"""
	Create a paragraph summary from the basic resume information.
	"""
	parts = []
	if info.get("Name"):
	parts.append(f"{info['Name']}")
	if info.get("Age"):
	parts.append(f"aged {info['Age']}")
	if info.get("Work Experience"):
	parts.append(f"with {info['Work Experience']} of work experience")
	if info.get("Expected Industry/Direction"):
	parts.append(f"seeking opportunities in {info['Expected Industry/Direction']}")

	if parts:
	summary_paragraph = "The candidate is " + ", ".join(parts) + "."
	else:
	summary_paragraph = "Basic information could not be extracted from the resume."
	return summary_paragraph

	#####################################
	# Main Resume Processing Logic
	#####################################
	def process_resume(file_obj):
	if file_obj is None:
	return None, None

	# Extract text content from the file.
	resume_text = extract_text_from_file(file_obj)
	# Extract and summarize basic info.
	basic_info = extract_basic_resume_info(resume_text)
	summary_paragraph = summarize_basic_info(basic_info)

	return resume_text, summary_paragraph

	#####################################
	# Streamlit Interface
	#####################################
	st.title("Resume Basic Info Summary")
	st.markdown("""
	Upload your resume file in .doc or .docx format. The app will extract the content and generate a summary paragraph
	that highlights the candidate's basic information (name, age, work experience, and expected industry/direction).
	""")

	uploaded_file = st.file_uploader("Upload Resume", type=["doc", "docx"])

	if st.button("Process Resume"):
	if uploaded_file is None:
	st.error("Please upload a file first.")
	else:
	with st.spinner("Processing resume..."):
	resume_text, summary_paragraph = process_resume(uploaded_file)

	st.subheader("Summary of Basic Information")
	st.markdown(summary_paragraph)