Spaces:

CR7CAD
/

ISOM5240FinalProject

Sleeping

App Files Files Community

CR7CAD commited on Mar 18

Commit

2e98a93

verified ·

1 Parent(s): 86037f3

Update app.py

Browse files

Files changed (1) hide show

app.py +40 -11

app.py CHANGED Viewed

@@ -2,6 +2,8 @@ import os
 import io
 import streamlit as st
 import docx
 from transformers import pipeline
 import numpy as np
 from scipy.spatial.distance import cosine
@@ -45,7 +47,7 @@ models = load_models()
 #####################################
 def extract_text_from_file(file_obj):
     """
-    Extract text from .docx files.
     Returns the extracted text or an error message if extraction fails.
     """
     filename = file_obj.name
@@ -75,8 +77,13 @@ def extract_text_from_file(file_obj):
             os.unlink(temp_path)
         except Exception as e:
             text = f"Error processing DOC file: {e}"
     else:
-        text = "Unsupported file type. Please upload a .docx or .doc file."
     return text
 #####################################
@@ -84,7 +91,8 @@ def extract_text_from_file(file_obj):
 #####################################
 def summarize_resume_text(resume_text, models):
     """
-    Generates a concise summary of the resume text using the selected summarization model.
     """
     start_time = time.time()
@@ -93,6 +101,9 @@ def summarize_resume_text(resume_text, models):
     # Handle long text
     max_input_length = 1024  # Model limit
     if len(resume_text) > max_input_length:
         # Process in chunks if text is too long
         chunks = [resume_text[i:i+max_input_length] for i in range(0, min(len(resume_text), 3*max_input_length), max_input_length)]
@@ -104,13 +115,31 @@ def summarize_resume_text(resume_text, models):
         candidate_summary = " ".join(summaries)
         if len(candidate_summary) > max_input_length:
-            candidate_summary = summarizer(candidate_summary[:max_input_length], max_length=150, min_length=40, do_sample=False)[0]['summary_text']
     else:
-        candidate_summary = summarizer(resume_text, max_length=150, min_length=40, do_sample=False)[0]['summary_text']
     execution_time = time.time() - start_time
-    return candidate_summary, execution_time
 #####################################
 # Function: Compare Candidate Summary to Company Prompt
@@ -145,15 +174,15 @@ def compute_suitability(candidate_summary, company_prompt, models):
 st.title("Resume Analyzer and Company Suitability Checker")
 st.markdown(
     """
-Upload your resume file in **.docx** or **.txt** format. The app performs the following tasks:
 1. Extracts text from the resume.
-2. Uses a transformer-based model to generate a concise candidate summary.
 3. Compares the candidate summary with a company profile to produce a suitability score.
 """
 )
 # File uploader
-uploaded_file = st.file_uploader("Upload your resume (.docx or .txt)", type=["docx", "txt"])
 # Company description text area
 company_prompt = st.text_area(
@@ -168,7 +197,7 @@ if uploaded_file is not None and company_prompt and st.button("Analyze Resume"):
         # Extract text from resume
         resume_text = extract_text_from_file(uploaded_file)
-        if resume_text.startswith("Error") or resume_text == "Unsupported file type. Please upload a .docx or .txt file.":
             st.error(resume_text)
         else:
             # Generate summary
@@ -176,7 +205,7 @@ if uploaded_file is not None and company_prompt and st.button("Analyze Resume"):
             # Display summary
             st.subheader("Candidate Summary")
-            st.write(summary)
             st.info(f"Summarization completed in {summarization_time:.2f} seconds")
             # Only compute similarity if company description is provided

 import io
 import streamlit as st
 import docx
+import docx2txt
+import tempfile
 from transformers import pipeline
 import numpy as np
 from scipy.spatial.distance import cosine
 #####################################
 def extract_text_from_file(file_obj):
     """
+    Extract text from .docx and .doc files.
     Returns the extracted text or an error message if extraction fails.
     """
     filename = file_obj.name
             os.unlink(temp_path)
         except Exception as e:
             text = f"Error processing DOC file: {e}"
+    elif ext == ".txt":
+        try:
+            text = file_obj.getvalue().decode("utf-8")
+        except Exception as e:
+            text = f"Error processing TXT file: {e}"
     else:
+        text = "Unsupported file type. Please upload a .docx, .doc, or .txt file."
     return text
 #####################################
 #####################################
 def summarize_resume_text(resume_text, models):
     """
+    Generates a structured summary of the resume text including name, age,
+    expected job industry, and skills of the candidate.
     """
     start_time = time.time()
     # Handle long text
     max_input_length = 1024  # Model limit
+    # Append instructions to guide the model to extract structured information
+    prompt = f"Summarize this resume and include the candidate's name, age, expected job industry, and skills: {resume_text[:max_input_length]}"
     if len(resume_text) > max_input_length:
         # Process in chunks if text is too long
         chunks = [resume_text[i:i+max_input_length] for i in range(0, min(len(resume_text), 3*max_input_length), max_input_length)]
         candidate_summary = " ".join(summaries)
         if len(candidate_summary) > max_input_length:
+            candidate_summary = summarizer(f"Provide name, age, expected job industry, and skills of the candidate: {candidate_summary[:max_input_length]}",
+                                          max_length=150, min_length=40, do_sample=False)[0]['summary_text']
     else:
+        candidate_summary = summarizer(prompt, max_length=150, min_length=40, do_sample=False)[0]['summary_text']
+    # Format the summary to ensure it contains the required information
+    # If the model doesn't extract all required information, we'll add placeholders
+    formatted_summary = candidate_summary
+    # Check if the summary contains the required information and add labels if needed
+    if "name:" not in formatted_summary.lower() and "name " not in formatted_summary.lower():
+        formatted_summary = "Name: [Not explicitly mentioned in resume]\n" + formatted_summary
+    if "age:" not in formatted_summary.lower() and "age " not in formatted_summary.lower():
+        formatted_summary += "\nAge: [Not explicitly mentioned in resume]"
+    if "industry:" not in formatted_summary.lower() and "expected job" not in formatted_summary.lower():
+        formatted_summary += "\nExpected Job Industry: [Based on resume content]"
+    if "skills:" not in formatted_summary.lower() and "skills " not in formatted_summary.lower():
+        formatted_summary += "\nSkills: [Key skills extracted from resume]"
     execution_time = time.time() - start_time
+    return formatted_summary, execution_time
 #####################################
 # Function: Compare Candidate Summary to Company Prompt
 st.title("Resume Analyzer and Company Suitability Checker")
 st.markdown(
     """
+Upload your resume file in **.docx**, **.doc**, or **.txt** format. The app performs the following tasks:
 1. Extracts text from the resume.
+2. Uses a transformer-based model to generate a structured candidate summary with name, age, expected job industry, and skills.
 3. Compares the candidate summary with a company profile to produce a suitability score.
 """
 )
 # File uploader
+uploaded_file = st.file_uploader("Upload your resume (.docx, .doc, or .txt)", type=["docx", "doc", "txt"])
 # Company description text area
 company_prompt = st.text_area(
         # Extract text from resume
         resume_text = extract_text_from_file(uploaded_file)
+        if resume_text.startswith("Error") or resume_text == "Unsupported file type. Please upload a .docx, .doc, or .txt file.":
             st.error(resume_text)
         else:
             # Generate summary
             # Display summary
             st.subheader("Candidate Summary")
+            st.markdown(summary)
             st.info(f"Summarization completed in {summarization_time:.2f} seconds")
             # Only compute similarity if company description is provided