Spaces:

CR7CAD
/

ISOM5240FinalProject

Sleeping

App Files Files Community

CR7CAD commited on Mar 16

Commit

cc18787

verified ·

1 Parent(s): 89f5ee9

Update app.py

Browse files

Files changed (1) hide show

app.py +36 -108

app.py CHANGED Viewed

@@ -5,6 +5,7 @@ import streamlit as st
 import docx
 import textract
 from sentence_transformers import SentenceTransformer, util
 #####################################
 # Function: Extract Text from File
@@ -44,109 +45,30 @@ def extract_text_from_file(file_obj):
     return text
 #####################################
-# Function: Extract Basic Resume Information
 #####################################
-def extract_basic_resume_info(text):
     """
-    Parse the extracted text to extract/summarize:
-    - Name
-    - Age
-    - Job Experience (capturing the block under the "experience" section)
-    - Skills
-    - Education
-    Returns a dictionary with the extracted elements.
     """
-    info = {
-        "Name": None,
-        "Age": None,
-        "Job Experience": None,
-        "Skills": None,
-        "Education": None,
-    }
-    # Extract Name (e.g., "CONG, An Dong" from the first line)
-    name_match = re.search(r"^([A-Z]+)[,\s]+([A-Z][a-z]+(?:\s+[A-Z][a-z]+)?)", text, re.MULTILINE)
-    if name_match:
-        info["Name"] = f"{name_match.group(1)} {name_match.group(2)}"
-    else:
-    # Fallback heuristic: assume a line with two or three capitalized words might be the candidate's name.
-        potential_names = re.findall(r"\b[A-Z][a-z]+(?:\s+[A-Z][a-z]+){1,2}\b", text)
-        if potential_names:
-            info["Name"] = potential_names[0]
-    # Extract Age (e.g., "Age: 28")
-    age_match = re.search(r"[Aa]ge[:\-]\s*(\d{1,3})", text)
-    if age_match:
-        info["Age"] = age_match.group(1)
-    # Extract Job Experience using the "experience" section.
-    # Capture everything after the word "experience" until a new section or the end.
-    experience_match = re.search(
-        r"experience\s*(.*?)(?:\n\s*\n|additional information|skills|education|$)",
-        text,
-        re.IGNORECASE | re.DOTALL,
-    )
-    if experience_match:
-        job_experience = experience_match.group(1).strip()
-        info["Job Experience"] = " ".join(job_experience.split())
-    else:
-        # Fallback if not a labeled section.
-        exp_match = re.search(
-            r"(\d+)\s+(years|yrs)\s+(?:of\s+)?experience", text, re.IGNORECASE
-        )
-        if exp_match:
-            info["Job Experience"] = f"{exp_match.group(1)} {exp_match.group(2)}"
-    # Extract Skills (e.g., "Skills: Python, Java, SQL")
-    skills_match = re.search(r"(Skills|Technical Skills)[:\-]\s*(.+)", text, re.IGNORECASE)
-    if skills_match:
-        skills_str = skills_match.group(2).strip()
-        info["Skills"] = skills_str.rstrip(".")
-    # Extract Education (e.g., "Education: ...")
-    edu_match = re.search(
-        r"education\s*(.*?)(?:\n\s*\n|experience|$)", text, re.IGNORECASE | re.DOTALL
-    )
-    if edu_match:
-        education_block = edu_match.group(1).strip()
-        info["Education"] = " ".join(education_block.split())
-    else:
-        # Fallback: search for common degree identifiers.
-        edu_match = re.search(r"(Bachelor|Master|B\.Sc|M\.Sc|Ph\.D)[^\n]+", text)
-        if edu_match:
-            info["Education"] = edu_match.group(0)
-    return info
-#####################################
-# Function: Summarize Basic Info into a Paragraph
-#####################################
-def summarize_basic_info(info):
     """
-    Combine the extracted resume elements into a concise summary paragraph.
     """
-    parts = []
-    if info.get("Name"):
-        parts.append(f"Candidate {info['Name']}")
-    else:
-        parts.append("The candidate")
-    if info.get("Age"):
-        parts.append(f"aged {info['Age']}")
-    if info.get("Job Experience"):
-        parts.append(f"with job experience: {info['Job Experience']}")
-    if info.get("Skills"):
-        parts.append(f"skilled in {info['Skills']}")
-    if info.get("Education"):
-        parts.append(f"and educated in {info['Education']}")
-    summary_paragraph = ", ".join(parts) + "."
-    return summary_paragraph
 #####################################
 # Function: Compare Candidate Summary to Company Prompt
@@ -166,19 +88,24 @@ def compute_suitability(candidate_summary, company_prompt, model):
 # Main Resume Processing Logic
 #####################################
 def process_resume(file_obj):
     resume_text = extract_text_from_file(file_obj)
-    basic_info = extract_basic_resume_info(resume_text)
-    summary_paragraph = summarize_basic_info(basic_info)
-    return summary_paragraph
 #####################################
-# Load the Sentence-BERT Model
 #####################################
 @st.cache_resource(show_spinner=False)
-def load_model():
     return SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
-model = load_model()
 #####################################
 # Streamlit Interface
@@ -186,9 +113,10 @@ model = load_model()
 st.title("Resume Analyzer and Company Suitability Checker")
 st.markdown(
     """
-Upload your resume file in **.doc** or **.docx** format. The app extracts key details such as name, age, job experience, skills,
-and education, and summarizes them into a single paragraph. Then, it compares the candidate summary with a company profile
-(using a pre-defined prompt for Google LLC) to produce a suitability score.
 """
 )
@@ -206,7 +134,7 @@ if st.button("Process Resume"):
         st.subheader("Candidate Summary")
         st.markdown(candidate_summary)
-# Pre-define the company prompt for Google LLC.
 default_company_prompt = (
     "Google LLC, a global leader in technology and innovation, specializes in internet services, cloud computing, "
     "artificial intelligence, and software development. As part of Alphabet Inc., Google seeks candidates with strong "
@@ -235,5 +163,5 @@ if st.button("Compute Suitability Score"):
             st.error("Please enter the company information.")
         else:
             with st.spinner("Computing suitability score..."):
-                score = compute_suitability(candidate_summary, company_prompt, model)
             st.success(f"Suitability Score: {score:.2f} (range 0 to 1)")

 import docx
 import textract
 from sentence_transformers import SentenceTransformer, util
+from transformers import pipeline
 #####################################
 # Function: Extract Text from File
     return text
 #####################################
+# Function: Summarize Resume Text using a Transformer Model
 #####################################
+@st.cache_resource(show_spinner=False)
+def load_summarizer():
     """
+    Loads the summarization pipeline using a transformer model.
+    We use the model "ainize/bart-base-cnn" for summarization.
     """
+    return pipeline("summarization", model="ainize/bart-base-cnn")
+def summarize_resume_text(resume_text):
     """
+    Generates a concise summary of the resume text using the summarization model.
+    If the resume text is very long, we trim it to avoid hitting the model's maximum input size.
     """
+    summarizer = load_summarizer()
+    # In case the resume text is too long, we trim it.
+    max_input_length = 1024  # adjust as needed
+    if len(resume_text) > max_input_length:
+        resume_text = resume_text[:max_input_length]
+    # The summarization pipeline returns a list of summaries.
+    summary_result = summarizer(resume_text, max_length=150, min_length=40, do_sample=False)
+    candidate_summary = summary_result[0]['summary_text']
+    return candidate_summary
 #####################################
 # Function: Compare Candidate Summary to Company Prompt
 # Main Resume Processing Logic
 #####################################
 def process_resume(file_obj):
+    """
+    Extracts text from the uploaded file and then generates a summary
+    using a text summarization model.
+    """
     resume_text = extract_text_from_file(file_obj)
+    candidate_summary = summarize_resume_text(resume_text)
+    return candidate_summary
 #####################################
+# Load the Sentence-BERT Model (Semantic Similarity Model)
 #####################################
 @st.cache_resource(show_spinner=False)
+def load_sbert_model():
+    # This loads the Sentence-BERT model "all-MiniLM-L6-v2"
     return SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
+# Load Sentence-BERT model for computing semantic similarity.
+sbert_model = load_sbert_model()
 #####################################
 # Streamlit Interface
 st.title("Resume Analyzer and Company Suitability Checker")
 st.markdown(
     """
+Upload your resume file in **.doc** or **.docx** format. The app performs the following tasks:
+1. Extracts text from the resume.
+2. Uses a transformer-based text summarization model (**ainize/bart-base-cnn**) to generate a concise candidate summary.
+3. Compares the candidate summary with a company profile (using Sentence-BERT) to produce a suitability score.
 """
 )
         st.subheader("Candidate Summary")
         st.markdown(candidate_summary)
+# Pre-defined company prompt for Google LLC.
 default_company_prompt = (
     "Google LLC, a global leader in technology and innovation, specializes in internet services, cloud computing, "
     "artificial intelligence, and software development. As part of Alphabet Inc., Google seeks candidates with strong "
             st.error("Please enter the company information.")
         else:
             with st.spinner("Computing suitability score..."):
+                score = compute_suitability(candidate_summary, company_prompt, sbert_model)
             st.success(f"Suitability Score: {score:.2f} (range 0 to 1)")