Spaces:

CR7CAD
/

ISOM5240FinalProject

Sleeping

App Files Files Community

CR7CAD commited on Mar 18

Commit

c9f8450

verified ·

1 Parent(s): adf6e8f

Update app.py

Browse files

Files changed (1) hide show

app.py +66 -34

app.py CHANGED Viewed

@@ -5,7 +5,6 @@ import docx
 import docx2txt
 import tempfile
 import numpy as np
-from scipy.spatial.distance import cosine
 import time
 import re
 import concurrent.futures
@@ -37,8 +36,8 @@ def load_models():
         # Load smaller summarization model for speed
         models['summarizer'] = pipeline("summarization", model="facebook/bart-large-cnn", max_length=130)
-        # Load smaller feature extraction model for speed
-        models['feature_extractor'] = pipeline("feature-extraction", model="roberta-base")
         return models
@@ -297,33 +296,64 @@ def summarize_resume_text(resume_text):
     return formatted_summary, execution_time
 #####################################
-# Function: Compare Candidate Summary to Company Prompt - Optimized
 #####################################
-# Fixed: Use underscore prefix for non-hashable arguments to tell Streamlit not to hash them
 @st.cache_data(show_spinner=False)
-def compute_suitability(candidate_summary, company_prompt, _feature_extractor=None):
     """
-    Compute the similarity between candidate summary and company prompt.
-    Returns a score in the range [0, 1] and execution time.
     """
     start_time = time.time()
-    feature_extractor = _feature_extractor or models['feature_extractor']
-    # Extract features (embeddings)
-    candidate_features = feature_extractor(candidate_summary)
-    company_features = feature_extractor(company_prompt)
-    # Convert to numpy arrays and flatten if needed
-    candidate_vec = np.mean(np.array(candidate_features[0]), axis=0)
-    company_vec = np.mean(np.array(company_features[0]), axis=0)
-    # Compute cosine similarity (1 - cosine distance)
-    similarity = 1 - cosine(candidate_vec, company_vec)
     execution_time = time.time() - start_time
-    return similarity, execution_time
 #####################################
 # Main Streamlit Interface - with Progress Reporting
@@ -334,7 +364,7 @@ st.markdown(
 Upload your resume file in **.docx**, **.doc**, or **.txt** format. The app performs the following tasks:
 1. Extracts text from the resume.
 2. Uses AI to generate a structured candidate summary with name, age, expected job industry, previous work experience, and skills.
-3. Compares the candidate summary with a company profile to produce a suitability score.
 """
 )
@@ -372,26 +402,28 @@ if uploaded_file is not None and company_prompt and st.button("Analyze Resume"):
         st.markdown(summary)
         st.info(f"Summary generated in {summarization_time:.2f} seconds")
-        # Step 3: Compute similarity
-        status_text.text("Step 3/3: Calculating compatibility with company profile...")
-        # Pass the feature extractor with an underscore prefix to avoid hashing issues
-        similarity_score, similarity_time = compute_suitability(summary, company_prompt, _feature_extractor=models['feature_extractor'])
         progress_bar.progress(100)
         # Clear status messages
         status_text.empty()
-        # Display similarity score
         st.subheader("Suitability Assessment")
-        st.markdown(f"**Matching Score:** {similarity_score:.2%}")
-        st.info(f"Compatibility assessment completed in {similarity_time:.2f} seconds")
-        # Provide interpretation
-        if similarity_score >= 0.85:
-            st.success("Excellent match! This candidate's profile is strongly aligned with the company requirements.")
-        elif similarity_score >= 0.70:
-            st.success("Good match! This candidate shows strong potential for the position.")
-        elif similarity_score >= 0.50:
-            st.warning("Moderate match. The candidate meets some requirements but there may be gaps.")
         else:
-            st.error("Low match. The candidate's profile may not align well with the requirements.")

 import docx2txt
 import tempfile
 import numpy as np
 import time
 import re
 import concurrent.futures
         # Load smaller summarization model for speed
         models['summarizer'] = pipeline("summarization", model="facebook/bart-large-cnn", max_length=130)
+        # Load Phi-4 model for evaluation
+        models['evaluator'] = pipeline("text-generation", model="microsoft/Phi-4-mini-instruct", max_new_tokens=150)
         return models
     return formatted_summary, execution_time
 #####################################
+# Function: Evaluate Candidate with Phi-4
 #####################################
 @st.cache_data(show_spinner=False)
+def evaluate_suitability(candidate_summary, company_prompt, _evaluator=None):
     """
+    Use the Phi-4 model to evaluate the suitability of a candidate
+    based on their resume summary and the company requirements.
     """
     start_time = time.time()
+    evaluator = _evaluator or models['evaluator']
+    # Craft a prompt for the model
+    prompt = f"""You are an expert HR recruiter. Analyze the candidate's profile and the job requirements to provide:
+1. A suitability score from 0 to 100
+2. A brief evaluation explaining why the candidate is or isn't suitable
+Candidate Profile:
+{candidate_summary}
+Job Requirements:
+{company_prompt}
+Give your assessment in this format:
+Score: [0-100]
+Evaluation: [Your brief evaluation]
+"""
+    # Generate the evaluation with Phi-4
+    result = evaluator(prompt, do_sample=True, temperature=0.3)[0]['generated_text']
+    # Extract the score and evaluation from the result
+    score_match = re.search(r'Score:\s*(\d+)', result)
+    if score_match:
+        score = int(score_match.group(1))
+        # Normalize to 0-1 range
+        normalized_score = score / 100
+    else:
+        # Default score if extraction fails
+        normalized_score = 0.5
+    # Extract the evaluation text
+    evaluation_match = re.search(r'Evaluation:(.*?)($|\n\n)', result, re.DOTALL)
+    if evaluation_match:
+        evaluation = evaluation_match.group(1).strip()
+    else:
+        # Extract text after "Score:" line if specific evaluation format is not found
+        lines = result.split('\n')
+        for i, line in enumerate(lines):
+            if 'Score:' in line and i+1 < len(lines):
+                evaluation = '\n'.join(lines[i+1:]).strip()
+                break
+        else:
+            evaluation = "The candidate's profile has been evaluated based on the job requirements."
     execution_time = time.time() - start_time
+    return normalized_score, evaluation, execution_time
 #####################################
 # Main Streamlit Interface - with Progress Reporting
 Upload your resume file in **.docx**, **.doc**, or **.txt** format. The app performs the following tasks:
 1. Extracts text from the resume.
 2. Uses AI to generate a structured candidate summary with name, age, expected job industry, previous work experience, and skills.
+3. Uses Phi-4 AI to evaluate the candidate's suitability for the company and provide feedback.
 """
 )
         st.markdown(summary)
         st.info(f"Summary generated in {summarization_time:.2f} seconds")
+        # Step 3: Evaluate candidate with Phi-4
+        status_text.text("Step 3/3: Evaluating candidate suitability with Phi-4...")
+        suitability_score, evaluation, evaluation_time = evaluate_suitability(
+            summary, company_prompt, _evaluator=models['evaluator']
+        )
         progress_bar.progress(100)
         # Clear status messages
         status_text.empty()
+        # Display suitability results
         st.subheader("Suitability Assessment")
+        st.markdown(f"**Matching Score:** {suitability_score:.0%}")
+        # Display colored evaluation box based on score
+        if suitability_score >= 0.85:
+            st.success(f"**Evaluation:** {evaluation}")
+        elif suitability_score >= 0.70:
+            st.success(f"**Evaluation:** {evaluation}")
+        elif suitability_score >= 0.50:
+            st.warning(f"**Evaluation:** {evaluation}")
         else:
+            st.error(f"**Evaluation:** {evaluation}")
+        st.info(f"Evaluation completed in {evaluation_time:.2f} seconds")