import os import tempfile import re import streamlit as st import docx import textract from sentence_transformers import SentenceTransformer, util from transformers import pipeline ##################################### # Function: Extract Text from File ##################################### def extract_text_from_file(file_obj): """ Extract text from .doc and .docx files. Returns the extracted text or an error message if extraction fails. """ filename = file_obj.name ext = os.path.splitext(filename)[1].lower() text = "" if ext == ".docx": try: document = docx.Document(file_obj) text = "\n".join([para.text for para in document.paragraphs]) except Exception as e: text = f"Error processing DOCX file: {e}" elif ext == ".doc": try: # textract requires a file name; save the file temporarily. with tempfile.NamedTemporaryFile(delete=False, suffix=".doc") as tmp: tmp.write(file_obj.read()) tmp.flush() tmp_filename = tmp.name text = textract.process(tmp_filename).decode("utf-8") except Exception as e: text = f"Error processing DOC file: {e}" finally: try: os.remove(tmp_filename) except Exception: pass else: text = "Unsupported file type." return text ##################################### # Function: Summarize Resume Text using a Transformer Model ##################################### @st.cache_resource(show_spinner=False) def load_summarizer(): """ Loads the summarization pipeline using a transformer model. We use the model "ainize/bart-base-cnn" for summarization. """ return pipeline("summarization", model="spursyy/mT5_multilingual_XLSum_rust") def summarize_resume_text(resume_text): """ Generates a concise summary of the resume text using the summarization model. If the resume text is very long, we trim it to avoid hitting the model's maximum input size. """ summarizer = load_summarizer() # In case the resume text is too long, we trim it. max_input_length = 1024 # adjust as needed if len(resume_text) > max_input_length: resume_text = resume_text[:max_input_length] # The summarization pipeline returns a list of summaries. summary_result = summarizer(resume_text, max_length=150, min_length=40, do_sample=False) candidate_summary = summary_result[0]['summary_text'] return candidate_summary ##################################### # Function: Compare Candidate Summary to Company Prompt ##################################### def compute_suitability(candidate_summary, company_prompt, model): """ Compute the cosine similarity between candidate summary and company prompt embeddings. Returns a score in the range [0, 1]. """ candidate_embed = model.encode(candidate_summary, convert_to_tensor=True) company_embed = model.encode(company_prompt, convert_to_tensor=True) cosine_sim = util.cos_sim(candidate_embed, company_embed) score = float(cosine_sim.item()) return score ##################################### # Main Resume Processing Logic ##################################### def process_resume(file_obj): """ Extracts text from the uploaded file and then generates a summary using a text summarization model. """ resume_text = extract_text_from_file(file_obj) candidate_summary = summarize_resume_text(resume_text) return candidate_summary ##################################### # Load the Sentence-BERT Model (Semantic Similarity Model) ##################################### @st.cache_resource(show_spinner=False) def load_sbert_model(): # This loads the Sentence-BERT model "all-MiniLM-L6-v2" return SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2") # Load Sentence-BERT model for computing semantic similarity. sbert_model = load_sbert_model() ##################################### # Streamlit Interface ##################################### st.title("Resume Analyzer and Company Suitability Checker") st.markdown( """ Upload your resume file in **.doc** or **.docx** format. The app performs the following tasks: 1. Extracts text from the resume. 2. Uses a transformer-based text summarization model (**ainize/bart-base-cnn**) to generate a concise candidate summary. 3. Compares the candidate summary with a company profile (using Sentence-BERT) to produce a suitability score. """ ) # File uploader for resume uploaded_file = st.file_uploader("Upload Resume", type=["doc", "docx"]) # Button to process the resume and store the summary in session state. if st.button("Process Resume"): if uploaded_file is None: st.error("Please upload a resume file first.") else: with st.spinner("Processing resume..."): candidate_summary = process_resume(uploaded_file) st.session_state["candidate_summary"] = candidate_summary st.subheader("Candidate Summary") st.markdown(candidate_summary) # Pre-defined company prompt for Google LLC. default_company_prompt = ( "Google LLC, a global leader in technology and innovation, specializes in internet services, cloud computing, " "artificial intelligence, and software development. As part of Alphabet Inc., Google seeks candidates with strong " "problem-solving skills, adaptability, and collaboration abilities. Technical roles require proficiency in programming " "languages such as Python, Java, C++, Go, or JavaScript, with expertise in data structures, algorithms, and system design. " "Additionally, skills in AI, cybersecurity, UX/UI design, and digital marketing are highly valued. Google fosters a culture " "of innovation, expecting candidates to demonstrate creativity, analytical thinking, and a passion for cutting-edge technology." ) # Company prompt text area. company_prompt = st.text_area( "Enter company details:", value=default_company_prompt, height=150, ) # Button to compute the suitability score. if st.button("Compute Suitability Score"): if "candidate_summary" not in st.session_state: st.error("Please process the resume first!") else: candidate_summary = st.session_state["candidate_summary"] if candidate_summary.strip() == "": st.error("Candidate summary is empty; please check your resume file.") elif company_prompt.strip() == "": st.error("Please enter the company information.") else: with st.spinner("Computing suitability score..."): score = compute_suitability(candidate_summary, company_prompt, sbert_model) st.success(f"Suitability Score: {score:.2f} (range 0 to 1)")