Spaces:

zayanomar5
/

omar

Sleeping

App Files Files Community

zayanomar5 commited on Jun 12, 2024

Commit

b1711fb

verified ·

1 Parent(s): 549f944

Update main.py

Browse files

Files changed (1) hide show

main.py +22 -47

main.py CHANGED Viewed

@@ -12,6 +12,7 @@ app = Flask(__name__)
 n_gpu_layers = 0
 n_batch = 1024
 llm = LlamaCpp(
     model_path="Phi-3-mini-4k-instruct-q4.gguf",  # path to GGUF file
     temperature=0.1,
@@ -27,15 +28,16 @@ model = SentenceTransformer('sentence-transformers/paraphrase-TinyBERT-L6-v2')
 file_size = os.stat('Phi-3-mini-4k-instruct-q4.gguf')
 print("model size ====> :", file_size.st_size, "bytes")
 @app.route('/cv', methods=['POST'])
 def get_skills():
     cv_body = request.json.get('cv_body')
     # Simple inference example
-    output = llm.invoke(
-        f"\n{cv_body}\nCan you list the skills mentioned in the CV?",
         max_tokens=256,  # Generate up to 256 tokens
-        stop=[""],
         echo=True,  # Whether to echo the prompt
     )
@@ -44,56 +46,29 @@ def get_skills():
 @app.get('/')
 def health():
     return jsonify({'status': 'Worked'})
-# Endpoint to compare between employee skills and job skills
 @app.route('/compare', methods=['POST'])
 def compare():
-    employee_skills = request.json.get('employee_skills')
-    jobs_skills = request.json.get('jobs_skills')
-    if not isinstance(employee_skills, list) or not all(isinstance(skill, str) for skill in employee_skills):
-        raise ValueError("employee_skills must be a list of strings")
-    if not isinstance(jobs_skills, list) or not all(isinstance(skill, str) for skill in jobs_skills):
-        raise ValueError("jobs_skills must be a list of strings")
-    job_embeddings = model.encode(jobs_skills)
-    employee_embeddings = model.encode(employee_skills)
-    similarity_scores = []
-    employee_embeddings_tensor = torch.from_numpy(employee_embeddings).unsqueeze(0)
-    for i, job_e in enumerate(job_embeddings):
-        job_e_tensor = torch.from_numpy(job_e).unsqueeze(0)
-        similarity_score = cosine_similarity(employee_embeddings_tensor, job_e_tensor, dim=1)
-        similarity_scores.append({"job": jobs_skills[i], "similarity_score": similarity_score.tolist()[0]})
-    return jsonify(similarity_scores)
-# Endpoint to compare job posts with employee skills
-@app.route('/compare_jop', methods=['POST'])
-def compare_jop():
-    employee_skills = request.json.get('post')
-    jobs_skills = request.json.get('employee_skills')
-    if not isinstance(employee_skills, list) or not all(isinstance(skill, str) for skill in employee_skills):
-        raise ValueError("employee_skills must be a list of strings")
     if not isinstance(jobs_skills, list) or not all(isinstance(skill, str) for skill in jobs_skills):
-        raise ValueError("jobs_skills must be a list of strings")
-    job_embeddings = model.encode(jobs_skills)
-    employee_embeddings = model.encode(employee_skills)
-    similarity_scores = []
-    employee_embeddings_tensor = torch.from_numpy(employee_embeddings).unsqueeze(0)
-    for i, job_e in enumerate(job_embeddings):
-        job_e_tensor = torch.from_numpy(job_e).unsqueeze(0)
-        similarity_score = cosine_similarity(employee_embeddings_tensor, job_e_tensor, dim=1)
-        similarity_scores.append({"job": jobs_skills[i], "similarity_score": similarity_score.tolist()[0]})
-    return jsonify(similarity_scores)
 if __name__ == '__main__':
     app.run()

 n_gpu_layers = 0
 n_batch = 1024
 llm = LlamaCpp(
     model_path="Phi-3-mini-4k-instruct-q4.gguf",  # path to GGUF file
     temperature=0.1,
 file_size = os.stat('Phi-3-mini-4k-instruct-q4.gguf')
 print("model size ====> :", file_size.st_size, "bytes")
 @app.route('/cv', methods=['POST'])
 def get_skills():
     cv_body = request.json.get('cv_body')
     # Simple inference example
+    output = llm(
+        f"<|user|>\n{cv_body}<|end|>\n<|assistant|>Can you list the skills mentioned in the CV?<|end|>",
         max_tokens=256,  # Generate up to 256 tokens
+        stop=["<|end|>"],
         echo=True,  # Whether to echo the prompt
     )
 @app.get('/')
 def health():
     return jsonify({'status': 'Worked'})
+# we will make here post request to compare between lists of skills one has employee just one text and the other has the of jobs has many texts
+# the llm will say the most similar job to the cv
 @app.route('/compare', methods=['POST'])
 def compare():
+    employee_skills = request.json.get('employee_skills')  # CV text
+    jobs_skills = request.json.get('jobs_skills')  # List of job skills
     if not isinstance(jobs_skills, list) or not all(isinstance(skill, str) for skill in jobs_skills):
+        raise ValueError("The jobs_skills must be a list of strings")
+    # Convert texts to embeddings arrays
+    employee_embedding = np.array([model.encode(employee_skills)])
+    job_embeddings = np.array([model.encode(skill) for skill in jobs_skills])
+    # Calculate similarity using cosine similarity
+    similarities = cosine_similarity(employee_embedding, job_embeddings)[0]
+    # Find the most similar job and its corresponding similarity score
+    max_similarity = np.max(similarities)
+    most_similar_index = np.argmax(similarities)
+    most_similar_job = jobs_skills[most_similar_index]
+    return jsonify({'job': most_similar_job, 'similarity_score': max_similarity})
 if __name__ == '__main__':
     app.run()