Spaces:

CR7CAD
/

ISOM5240FinalProject

Sleeping

App Files Files Community

CR7CAD commited on Mar 17

Commit

b0dca97

verified ·

1 Parent(s): 36a6684

Update app.py

Browse files

Files changed (1) hide show

app.py +16 -13

app.py CHANGED Viewed

@@ -1,12 +1,11 @@
 import os
 import tempfile
-import re
 import streamlit as st
 import docx
 import textract
-from sentence_transformers import SentenceTransformer, util
 from transformers import pipeline
 import threading
 #####################################
 # Load Models - Optimized with Threading
@@ -21,13 +20,15 @@ def load_models():
     def load_summarizer_thread():
         models['summarizer'] = pipeline("summarization", model="google/pegasus-xsum", device=0 if st.session_state.get('use_gpu', False) else -1)
-    def load_sbert_thread():
-        models['sbert'] = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2", device='cuda' if st.session_state.get('use_gpu', False) else 'cpu')
     # Start threads to load models in parallel
     threads = [
         threading.Thread(target=load_summarizer_thread),
-        threading.Thread(target=load_sbert_thread)
     ]
     for thread in threads:
@@ -104,21 +105,23 @@ def summarize_resume_text(resume_text, models):
     return candidate_summary
 #####################################
-# Function: Compare Candidate Summary to Company Prompt - Optimized
 #####################################
 def compute_suitability(candidate_summary, company_prompt, models):
     """
-    Compute the cosine similarity between candidate summary and company prompt embeddings.
     Returns a score in the range [0, 1].
     """
-    sbert_model = models['sbert']
-    # Encode texts in parallel (if supported by model)
-    embeddings = sbert_model.encode([candidate_summary, company_prompt], convert_to_tensor=True)
-    candidate_embed, company_embed = embeddings[0], embeddings[1]
-    cosine_sim = util.cos_sim(candidate_embed, company_embed)
-    score = float(cosine_sim.item())
     return score
 #####################################

 import os
 import tempfile
 import streamlit as st
 import docx
 import textract
 from transformers import pipeline
 import threading
+import numpy as np
 #####################################
 # Load Models - Optimized with Threading
     def load_summarizer_thread():
         models['summarizer'] = pipeline("summarization", model="google/pegasus-xsum", device=0 if st.session_state.get('use_gpu', False) else -1)
+    def load_similarity_thread():
+        # Using sentence-similarity pipeline instead of SentenceTransformer
+        models['similarity'] = pipeline("sentence-similarity", model="sentence-transformers/all-MiniLM-L6-v2",
+                                       device=0 if st.session_state.get('use_gpu', False) else -1)
     # Start threads to load models in parallel
     threads = [
         threading.Thread(target=load_summarizer_thread),
+        threading.Thread(target=load_similarity_thread)
     ]
     for thread in threads:
     return candidate_summary
 #####################################
+# Function: Compare Candidate Summary to Company Prompt - Using Pipeline
 #####################################
 def compute_suitability(candidate_summary, company_prompt, models):
     """
+    Compute the similarity between candidate summary and company prompt using the similarity pipeline.
     Returns a score in the range [0, 1].
     """
+    similarity_pipeline = models['similarity']
+    # The pipeline expects a document and a list of candidates to compare to
+    result = similarity_pipeline(
+        candidate_summary,
+        [company_prompt]
+    )
+    # Extract the similarity score from the result
+    score = result[0]['score']
     return score
 #####################################