zayanomar5 commited on
Commit
b1711fb
·
verified ·
1 Parent(s): 549f944

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +22 -47
main.py CHANGED
@@ -12,6 +12,7 @@ app = Flask(__name__)
12
  n_gpu_layers = 0
13
  n_batch = 1024
14
 
 
15
  llm = LlamaCpp(
16
  model_path="Phi-3-mini-4k-instruct-q4.gguf", # path to GGUF file
17
  temperature=0.1,
@@ -27,15 +28,16 @@ model = SentenceTransformer('sentence-transformers/paraphrase-TinyBERT-L6-v2')
27
  file_size = os.stat('Phi-3-mini-4k-instruct-q4.gguf')
28
  print("model size ====> :", file_size.st_size, "bytes")
29
 
 
30
  @app.route('/cv', methods=['POST'])
31
  def get_skills():
32
  cv_body = request.json.get('cv_body')
33
 
34
  # Simple inference example
35
- output = llm.invoke(
36
- f"\n{cv_body}\nCan you list the skills mentioned in the CV?",
37
  max_tokens=256, # Generate up to 256 tokens
38
- stop=[""],
39
  echo=True, # Whether to echo the prompt
40
  )
41
 
@@ -44,56 +46,29 @@ def get_skills():
44
  @app.get('/')
45
  def health():
46
  return jsonify({'status': 'Worked'})
47
-
48
- # Endpoint to compare between employee skills and job skills
49
  @app.route('/compare', methods=['POST'])
50
  def compare():
51
- employee_skills = request.json.get('employee_skills')
52
- jobs_skills = request.json.get('jobs_skills')
53
-
54
- if not isinstance(employee_skills, list) or not all(isinstance(skill, str) for skill in employee_skills):
55
- raise ValueError("employee_skills must be a list of strings")
56
-
57
- if not isinstance(jobs_skills, list) or not all(isinstance(skill, str) for skill in jobs_skills):
58
- raise ValueError("jobs_skills must be a list of strings")
59
-
60
- job_embeddings = model.encode(jobs_skills)
61
- employee_embeddings = model.encode(employee_skills)
62
-
63
- similarity_scores = []
64
- employee_embeddings_tensor = torch.from_numpy(employee_embeddings).unsqueeze(0)
65
-
66
- for i, job_e in enumerate(job_embeddings):
67
- job_e_tensor = torch.from_numpy(job_e).unsqueeze(0)
68
- similarity_score = cosine_similarity(employee_embeddings_tensor, job_e_tensor, dim=1)
69
- similarity_scores.append({"job": jobs_skills[i], "similarity_score": similarity_score.tolist()[0]})
70
-
71
- return jsonify(similarity_scores)
72
-
73
- # Endpoint to compare job posts with employee skills
74
- @app.route('/compare_jop', methods=['POST'])
75
- def compare_jop():
76
- employee_skills = request.json.get('post')
77
- jobs_skills = request.json.get('employee_skills')
78
-
79
- if not isinstance(employee_skills, list) or not all(isinstance(skill, str) for skill in employee_skills):
80
- raise ValueError("employee_skills must be a list of strings")
81
-
82
  if not isinstance(jobs_skills, list) or not all(isinstance(skill, str) for skill in jobs_skills):
83
- raise ValueError("jobs_skills must be a list of strings")
84
-
85
- job_embeddings = model.encode(jobs_skills)
86
- employee_embeddings = model.encode(employee_skills)
 
87
 
88
- similarity_scores = []
89
- employee_embeddings_tensor = torch.from_numpy(employee_embeddings).unsqueeze(0)
90
 
91
- for i, job_e in enumerate(job_embeddings):
92
- job_e_tensor = torch.from_numpy(job_e).unsqueeze(0)
93
- similarity_score = cosine_similarity(employee_embeddings_tensor, job_e_tensor, dim=1)
94
- similarity_scores.append({"job": jobs_skills[i], "similarity_score": similarity_score.tolist()[0]})
95
 
96
- return jsonify(similarity_scores)
97
 
98
  if __name__ == '__main__':
99
  app.run()
 
12
  n_gpu_layers = 0
13
  n_batch = 1024
14
 
15
+
16
  llm = LlamaCpp(
17
  model_path="Phi-3-mini-4k-instruct-q4.gguf", # path to GGUF file
18
  temperature=0.1,
 
28
  file_size = os.stat('Phi-3-mini-4k-instruct-q4.gguf')
29
  print("model size ====> :", file_size.st_size, "bytes")
30
 
31
+
32
  @app.route('/cv', methods=['POST'])
33
  def get_skills():
34
  cv_body = request.json.get('cv_body')
35
 
36
  # Simple inference example
37
+ output = llm(
38
+ f"<|user|>\n{cv_body}<|end|>\n<|assistant|>Can you list the skills mentioned in the CV?<|end|>",
39
  max_tokens=256, # Generate up to 256 tokens
40
+ stop=["<|end|>"],
41
  echo=True, # Whether to echo the prompt
42
  )
43
 
 
46
  @app.get('/')
47
  def health():
48
  return jsonify({'status': 'Worked'})
49
+ # we will make here post request to compare between lists of skills one has employee just one text and the other has the of jobs has many texts
50
+ # the llm will say the most similar job to the cv
51
  @app.route('/compare', methods=['POST'])
52
  def compare():
53
+ employee_skills = request.json.get('employee_skills') # CV text
54
+ jobs_skills = request.json.get('jobs_skills') # List of job skills
55
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
  if not isinstance(jobs_skills, list) or not all(isinstance(skill, str) for skill in jobs_skills):
57
+ raise ValueError("The jobs_skills must be a list of strings")
58
+
59
+ # Convert texts to embeddings arrays
60
+ employee_embedding = np.array([model.encode(employee_skills)])
61
+ job_embeddings = np.array([model.encode(skill) for skill in jobs_skills])
62
 
63
+ # Calculate similarity using cosine similarity
64
+ similarities = cosine_similarity(employee_embedding, job_embeddings)[0]
65
 
66
+ # Find the most similar job and its corresponding similarity score
67
+ max_similarity = np.max(similarities)
68
+ most_similar_index = np.argmax(similarities)
69
+ most_similar_job = jobs_skills[most_similar_index]
70
 
71
+ return jsonify({'job': most_similar_job, 'similarity_score': max_similarity})
72
 
73
  if __name__ == '__main__':
74
  app.run()