Spaces:

CR7CAD
/

ISOM5240FinalProject

Sleeping

App Files Files Community

CR7CAD commited on Mar 18

Commit

6713758

verified ·

1 Parent(s): 41d8604

Update app.py

Browse files

Files changed (1) hide show

app.py +39 -17

app.py CHANGED Viewed

@@ -4,6 +4,8 @@ import streamlit as st
 import docx
 from transformers import pipeline
 import time
 # Set page title and hide sidebar
 st.set_page_config(
@@ -28,10 +30,10 @@ def load_models():
     with st.spinner("Loading AI models... This may take a minute on first run."):
         models = {}
         # Load summarization model
-        models['summarizer'] = pipeline("summarization", model="google/pegasus-xsum")
         # Load text generation model for suitability assessment
-        models['text_generator'] = pipeline("text-generation", model="gpt2")  # You can use different models
         return models
@@ -43,7 +45,7 @@ models = load_models()
 #####################################
 def extract_text_from_file(file_obj):
     """
-    Extract text from .docx files.
     Returns the extracted text or an error message if extraction fails.
     """
     filename = file_obj.name
@@ -56,13 +58,33 @@ def extract_text_from_file(file_obj):
             text = "\n".join(para.text for para in document.paragraphs if para.text.strip())
         except Exception as e:
             text = f"Error processing DOCX file: {e}"
-    elif ext == ".txt":
         try:
-            text = file_obj.getvalue().decode("utf-8")
         except Exception as e:
-            text = f"Error processing TXT file: {e}"
     else:
-        text = "Unsupported file type. Please upload a .docx or .txt file."
     return text
 #####################################
@@ -103,24 +125,24 @@ def summarize_resume_text(resume_text, models):
 #####################################
 def generate_suitability_assessment(candidate_summary, company_prompt, models):
     """
-    Generate a suitability assessment using text generation instead of similarity.
     Returns the generated assessment text and execution time.
     """
     start_time = time.time()
     text_generator = models['text_generator']
-    # Create a prompt for the text generation model
     prompt = f"""
 Resume Summary: {candidate_summary}
 Company Description: {company_prompt}
 Suitability Assessment:
-This candidate is a"""
     # Generate text
-    max_length = 80 + len(prompt.split())  # Limit output length
     generated_text = text_generator(
         prompt,
         max_length=max_length,
@@ -135,8 +157,8 @@ This candidate is a"""
     # Determine a numerical score from the text
     # This is a simplified approach - we're looking for positive and negative words
-    positive_words = ['excellent', 'perfect', 'great', 'good', 'strong', 'ideal', 'qualified']
-    negative_words = ['poor', 'weak', 'bad', 'insufficient', 'inadequate', 'not a good']
     assessment_lower = assessment.lower()
@@ -164,15 +186,15 @@ This candidate is a"""
 st.title("Resume Analyzer and Company Suitability Checker")
 st.markdown(
     """
-Upload your resume file in **.docx** or **.txt** format. The app performs the following tasks:
 1. Extracts text from the resume.
 2. Uses a transformer-based model to generate a concise candidate summary.
-3. Uses text generation to assess the candidate's suitability for the company.
 """
 )
 # File uploader
-uploaded_file = st.file_uploader("Upload your resume (.docx or .txt)", type=["docx", "txt"])
 # Company description text area
 company_prompt = st.text_area(
@@ -187,7 +209,7 @@ if uploaded_file is not None and company_prompt and st.button("Analyze Resume"):
         # Extract text from resume
         resume_text = extract_text_from_file(uploaded_file)
-        if resume_text.startswith("Error") or resume_text == "Unsupported file type. Please upload a .docx or .txt file.":
             st.error(resume_text)
         else:
             # Generate summary

 import docx
 from transformers import pipeline
 import time
+import tempfile
+import subprocess
 # Set page title and hide sidebar
 st.set_page_config(
     with st.spinner("Loading AI models... This may take a minute on first run."):
         models = {}
         # Load summarization model
+        models['summarizer'] = pipeline("summarization", model="marianna13/flan-t5-base-summarization")
         # Load text generation model for suitability assessment
+        models['text_generator'] = pipeline("text-generation", model="gpt2")
         return models
 #####################################
 def extract_text_from_file(file_obj):
     """
+    Extract text from .doc or .docx files.
     Returns the extracted text or an error message if extraction fails.
     """
     filename = file_obj.name
             text = "\n".join(para.text for para in document.paragraphs if para.text.strip())
         except Exception as e:
             text = f"Error processing DOCX file: {e}"
+    elif ext == ".doc":
         try:
+            # For .doc files, we need to save to a temp file and use an external tool
+            # This example uses antiword which needs to be installed in the environment
+            with tempfile.NamedTemporaryFile(delete=False, suffix='.doc') as temp_file:
+                temp_file.write(file_obj.getvalue())
+                temp_path = temp_file.name
+            # Try using python-docx2txt if available
+            try:
+                import docx2txt
+                text = docx2txt.process(temp_path)
+            except ImportError:
+                # Fallback to antiword if installed
+                try:
+                    text = subprocess.check_output(['antiword', temp_path]).decode('utf-8')
+                except:
+                    # If all else fails, inform the user
+                    text = "Could not process .doc file. Please convert to .docx format."
+            # Clean up temp file
+            os.unlink(temp_path)
         except Exception as e:
+            text = f"Error processing DOC file: {e}"
     else:
+        text = "Unsupported file type. Please upload a .doc or .docx file."
     return text
 #####################################
 #####################################
 def generate_suitability_assessment(candidate_summary, company_prompt, models):
     """
+    Generate a suitability assessment using text generation.
     Returns the generated assessment text and execution time.
     """
     start_time = time.time()
     text_generator = models['text_generator']
+    # Create a prompt for the text generation model that focuses on candidate alignment with company
     prompt = f"""
 Resume Summary: {candidate_summary}
 Company Description: {company_prompt}
 Suitability Assessment:
+Based on an analysis of the candidate's profile compared to the company requirements, this candidate"""
     # Generate text
+    max_length = 100 + len(prompt.split())  # Limit output length
     generated_text = text_generator(
         prompt,
         max_length=max_length,
     # Determine a numerical score from the text
     # This is a simplified approach - we're looking for positive and negative words
+    positive_words = ['excellent', 'perfect', 'great', 'good', 'strong', 'ideal', 'qualified', 'aligns well', 'matches', 'suitable']
+    negative_words = ['poor', 'weak', 'bad', 'insufficient', 'inadequate', 'not a good fit', 'misaligned', 'lacks', 'does not align']
     assessment_lower = assessment.lower()
 st.title("Resume Analyzer and Company Suitability Checker")
 st.markdown(
     """
+Upload your resume file in **.doc** or **.docx** format. The app performs the following tasks:
 1. Extracts text from the resume.
 2. Uses a transformer-based model to generate a concise candidate summary.
+3. Evaluates how well the candidate aligns with the company requirements.
 """
 )
 # File uploader
+uploaded_file = st.file_uploader("Upload your resume (.doc or .docx)", type=["doc", "docx"])
 # Company description text area
 company_prompt = st.text_area(
         # Extract text from resume
         resume_text = extract_text_from_file(uploaded_file)
+        if resume_text.startswith("Error") or resume_text == "Unsupported file type. Please upload a .doc or .docx file.":
             st.error(resume_text)
         else:
             # Generate summary