Spaces:

CR7CAD
/

ISOM5240FinalProject

Sleeping

App Files Files Community

CR7CAD commited on Mar 16

Commit

cccaa8e

verified ·

1 Parent(s): 6637415

Update app.py

Browse files

Files changed (1) hide show

app.py +55 -18

app.py CHANGED Viewed

@@ -4,6 +4,7 @@ import re
 import streamlit as st
 import docx
 import textract
 #####################################
 # Function: Extract Text from File
@@ -69,7 +70,7 @@ def extract_basic_resume_info(text):
     if name_match:
         info["Name"] = name_match.group(1).strip()
     else:
-        # Heuristic: Assume the first line or a line with two or three capitalized words is the candidate's name.
         potential_names = re.findall(r"\b[A-Z][a-z]+(?:\s+[A-Z][a-z]+){1,2}\b", text)
         if potential_names:
             info["Name"] = potential_names[0]
@@ -80,14 +81,13 @@ def extract_basic_resume_info(text):
         info["Age"] = age_match.group(1)
     # Extract Job Experience using the "experience" section.
-    # This regex captures everything after the word "experience" until the next section heading (e.g., "additional information" or "skills")
-    experience_match = re.search(r"experience\s*(.*?)(?:\n\s*\n|additional information|$)", text, re.IGNORECASE | re.DOTALL)
     if experience_match:
-        # Clean up the extracted block by removing any extra whitespace or newlines.
         job_experience = experience_match.group(1).strip()
         info["Job Experience"] = " ".join(job_experience.split())
     else:
-        # Fallback if a labeled section isn't found.
         exp_match = re.search(r"(\d+)\s+(years|yrs)\s+(?:of\s+)?experience", text, re.IGNORECASE)
         if exp_match:
             info["Job Experience"] = f"{exp_match.group(1)} {exp_match.group(2)}"
@@ -104,7 +104,7 @@ def extract_basic_resume_info(text):
         education_block = edu_match.group(1).strip()
         info["Education"] = " ".join(education_block.split())
     else:
-        # Fallback: search for lines starting with common degree words.
         edu_match = re.search(r"(Bachelor|Master|B\.Sc|M\.Sc|Ph\.D)[^\n]+", text)
         if edu_match:
             info["Education"] = edu_match.group(0)
@@ -140,27 +140,48 @@ def summarize_basic_info(info):
     summary_paragraph = ", ".join(parts) + "."
     return summary_paragraph
 #####################################
 # Main Resume Processing Logic
 #####################################
 def process_resume(file_obj):
     if file_obj is None:
-        return None, None
-    # Extract the full resume text.
     resume_text = extract_text_from_file(file_obj)
-    # Extract basic info from the text.
     basic_info = extract_basic_resume_info(resume_text)
-    # Create a summary paragraph from the basic info.
     summary_paragraph = summarize_basic_info(basic_info)
-    return resume_text, summary_paragraph
 #####################################
 # Streamlit Interface
 #####################################
-st.title("Resume Basic Information Summary")
 st.markdown("""
-Upload your resume file in **.doc** or **.docx** format. The app extracts key details such as name, age, job experience, skills,
-and education, then summarizes them into a single paragraph.
 """)
 uploaded_file = st.file_uploader("Upload Resume", type=["doc", "docx"])
@@ -170,10 +191,26 @@ if st.button("Process Resume"):
         st.error("Please upload a file first.")
     else:
         with st.spinner("Processing resume..."):
-            resume_text, summary_paragraph = process_resume(uploaded_file)
-        st.subheader("Summary Paragraph")
         st.markdown(summary_paragraph)
-        st.subheader("Full Extracted Resume Text")
-        st.text_area("", resume_text, height=300)

 import streamlit as st
 import docx
 import textract
+from sentence_transformers import SentenceTransformer, util
 #####################################
 # Function: Extract Text from File
     if name_match:
         info["Name"] = name_match.group(1).strip()
     else:
+        # Heuristic: assume a line with two or three capitalized words might be the candidate's name.
         potential_names = re.findall(r"\b[A-Z][a-z]+(?:\s+[A-Z][a-z]+){1,2}\b", text)
         if potential_names:
             info["Name"] = potential_names[0]
         info["Age"] = age_match.group(1)
     # Extract Job Experience using the "experience" section.
+    # Capture everything after the word "experience" until a new section or the end.
+    experience_match = re.search(r"experience\s*(.*?)(?:\n\s*\n|additional information|skills|education|$)", text, re.IGNORECASE | re.DOTALL)
     if experience_match:
         job_experience = experience_match.group(1).strip()
         info["Job Experience"] = " ".join(job_experience.split())
     else:
+        # Fallback if not a labeled section.
         exp_match = re.search(r"(\d+)\s+(years|yrs)\s+(?:of\s+)?experience", text, re.IGNORECASE)
         if exp_match:
             info["Job Experience"] = f"{exp_match.group(1)} {exp_match.group(2)}"
         education_block = edu_match.group(1).strip()
         info["Education"] = " ".join(education_block.split())
     else:
+        # Fallback: search for common degree identifiers.
         edu_match = re.search(r"(Bachelor|Master|B\.Sc|M\.Sc|Ph\.D)[^\n]+", text)
         if edu_match:
             info["Education"] = edu_match.group(0)
     summary_paragraph = ", ".join(parts) + "."
     return summary_paragraph
+#####################################
+# Function: Compare Candidate Summary to Company Prompt
+#####################################
+def compute_suitability(candidate_summary, company_prompt, model):
+    """
+    Compute the cosine similarity between candidate summary and company prompt embeddings.
+    Returns a score in the range [0, 1].
+    """
+    candidate_embed = model.encode(candidate_summary, convert_to_tensor=True)
+    company_embed = model.encode(company_prompt, convert_to_tensor=True)
+    cosine_sim = util.cos_sim(candidate_embed, company_embed)
+    score = float(cosine_sim.item())
+    return score
 #####################################
 # Main Resume Processing Logic
 #####################################
 def process_resume(file_obj):
     if file_obj is None:
+        return None
     resume_text = extract_text_from_file(file_obj)
     basic_info = extract_basic_resume_info(resume_text)
     summary_paragraph = summarize_basic_info(basic_info)
+    return summary_paragraph
+#####################################
+# Load the Sentence-BERT Model
+#####################################
+@st.cache_resource(show_spinner=False)
+def load_model():
+    return SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
+model = load_model()
 #####################################
 # Streamlit Interface
 #####################################
+st.title("Resume Analyzer and Company Suitability Checker")
 st.markdown("""
+Upload your resume file in **.doc** or **.docx** format. The app extracts key details (such as name, age, job experience, skills,
+and education) and summarizes them into a single paragraph. It then compares the candidate summary with the company profile
+(using a pre-defined prompt for Google LLC) to produce a suitability score.
 """)
 uploaded_file = st.file_uploader("Upload Resume", type=["doc", "docx"])
         st.error("Please upload a file first.")
     else:
         with st.spinner("Processing resume..."):
+            summary_paragraph = process_resume(uploaded_file)
+        st.subheader("Candidate Summary")
         st.markdown(summary_paragraph)
+        st.subheader("Company Information (Prompt)")
+        default_company_prompt = (
+            "Google LLC, a global leader in technology and innovation, specializes in internet services, cloud computing, "
+            "artificial intelligence, and software development. As part of Alphabet Inc., Google seeks candidates with strong "
+            "problem-solving skills, adaptability, and collaboration abilities. Technical roles require proficiency in programming "
+            "languages such as Python, Java, C++, Go, or JavaScript, with expertise in data structures, algorithms, and system design. "
+            "Additionally, skills in AI, cybersecurity, UX/UI design, and digital marketing are highly valued. Google fosters a culture "
+            "of innovation, expecting candidates to demonstrate creativity, analytical thinking, and a passion for cutting-edge technology."
+        )
+        company_prompt = st.text_area("Enter company details:", value=default_company_prompt, height=150)
+        if st.button("Compute Suitability Score"):
+            if not company_prompt.strip():
+                st.error("Please enter the company information.")
+            else:
+                with st.spinner("Computing suitability score..."):
+                    score = compute_suitability(summary_paragraph, company_prompt, model)
+                st.success(f"Suitability Score: {score:.2f} (range 0 to 1)")