Spaces:

CR7CAD
/

ISOM5240FinalProject

Sleeping

App Files Files Community

ISOM5240FinalProject / app.py

CR7CAD

Update app.py

46ff202 verified 4 months ago

raw

history blame

5.33 kB

	import os
	import io
	import streamlit as st
	import docx
	import docx2txt
	import tempfile
	import time
	import re
	import concurrent.futures
	from functools import lru_cache
	from transformers import pipeline

	# ... [Keep your existing configurations and constants] ...

	#####################################
	# Preload Models - Optimized with DistilBART
	#####################################
	@st.cache_resource(show_spinner=True)
	def load_models():
	"""Load optimized models at startup"""
	with st.spinner("Loading AI models..."):
	# Use smaller, faster model
	models = {
	'summarizer': pipeline(
	"summarization",
	model="distilbart-base-cs", # Faster than BART
	max_length=300, # Reduced context window
	truncation=True,
	num_return_sequences=1
	)
	}
	return models

	models = load_models()

	#####################################
	# Function: Extract Text from File - Optimized
	#####################################
	@lru_cache(maxsize=16, typed=False)
	def extract_text_from_file(file_obj):
	"""Optimized text extraction with early exit"""
	filename = file_obj.name
	ext = os.path.splitext(filename)[1].lower()
	text = ""
	MAX_TEXT = 15000 # Reduced processing limit

	try:
	if ext == ".docx":
	doc = docx.Document(file_obj)
	# Only process first 50 paragraphs (approx 10 pages)
	text = "\n".join(para.text for para in doc.paragraphs[:50] if para.text.strip())[:MAX_TEXT]
	elif ext == ".doc":
	# Direct conversion using docx2txt
	text = docx2txt.process(file_obj.stream.read())[:MAX_TEXT]
	elif ext == ".txt":
	text = file_obj.read().decode("utf-8")[:MAX_TEXT]
	except Exception as e:
	text = f"Error: {str(e)}"

	return text

	# ... [Keep your existing extraction functions] ...

	#####################################
	# Optimized Summarization
	#####################################
	def summarize_resume_text(resume_text):
	"""Faster summarization with input truncation"""
	start_time = time.time()

	# Truncate text for summarization
	text_to_summarize = resume_text[:1024]
	base_summary = models['summarizer'](
	text_to_summarize,
	max_length=150, # Smaller summary
	truncation=True
	)[0]['summary_text']

	# Parallel extraction with thread pool
	with concurrent.futures.ThreadPoolExecutor() as executor:
	# Reduced number of parallel tasks
	name_future = executor.submit(extract_name, resume_text[:200])
	age_future = executor.submit(extract_age, resume_text)
	industry_future = executor.submit(extract_industry, resume_text, base_summary)

	# Get results
	name = name_future.result()
	age = age_future.result()
	industry = industry_future.result()
	skills, work = extract_skills_and_work(resume_text) # Sequential

	# Format summary (simplified)
	return f"Name: {name}\nAge: {age}\nIndustry: {industry}\n\n{base_summary}", 0.1

	# ... [Keep your scoring and feedback functions] ...

	#####################################
	# Optimized Streamlit Interface
	#####################################
	st.title("Google Resume Analyzer")

	# Initialize session state properly
	if 'progress' not in st.session_state:
	st.session_state['progress'] = 0
	if 'last_update' not in st.session_state:
	st.session_state['last_update'] = time.time()

	uploaded_file = st.file_uploader("Upload your resume", type=["docx", "doc", "txt"])

	if uploaded_file and st.button("Analyze"):
	# Use exponential backoff for progress updates
	progress_interval = 0.1
	max_retries = 10

	with st.spinner():
	# Step 1: Text extraction
	st.session_state['progress'] = 33
	resume_text = extract_text_from_file(uploaded_file)
	if "Error" in resume_text:
	st.error(resume_text)
	st.session_state['progress'] = 100
	continue

	# Step 2: Summarization
	st.session_state['progress'] = 66
	summary, _ = summarize_resume_text(resume_text)

	# Step 3: Scoring
	st.session_state['progress'] = 100

	# Display results
	st.subheader("Analysis Complete!")
	st.markdown(summary)

	# Display scores
	overall_score, category_scores, score_breakdown = calculate_google_match_score(summary)
	show_score(overall_score)

	# Display feedback
	feedback, _ = generate_template_feedback(category_scores)
	st.markdown(feedback)

	# Progress bar implementation
	if st.session_state['progress'] < 100:
	st.progress(st.session_state['progress'], 100)
	time.sleep(progress_interval)

	def show_score(score):
	"""Display score with appropriate formatting"""
	score_percent = int(score * 100)
	if score >= 0.85:
	st.success(f"Match Score: {score_percent}% 🌟")
	elif score >= 0.70:
	st.success(f"Match Score: {score_percent}% ✅")
	elif score >= 0.50:
	st.warning(f"Match Score: {score_percent}% ⚠️")
	else:
	st.error(f"Match Score: {score_percent}% 🔍")

	# ... [Keep your remaining functions] ...