zayanomar5 commited on
Commit
93885cf
·
verified ·
1 Parent(s): b1711fb

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +28 -29
main.py CHANGED
@@ -1,74 +1,73 @@
1
  from flask import Flask, request, jsonify
2
  from langchain_community.llms import LlamaCpp
3
  from sentence_transformers import SentenceTransformer
4
- from transformers import AutoTokenizer, AutoModel
5
-
6
- # cosine_similarity
7
  import torch
8
  from torch.nn.functional import cosine_similarity
9
  import os
 
10
  app = Flask(__name__)
11
 
12
  n_gpu_layers = 0
13
  n_batch = 1024
14
 
15
-
16
  llm = LlamaCpp(
17
- model_path="Phi-3-mini-4k-instruct-q4.gguf", # path to GGUF file
18
  temperature=0.1,
19
  n_gpu_layers=n_gpu_layers,
20
  n_batch=n_batch,
21
  verbose=True,
22
  n_ctx=4096
23
  )
24
- model0 = AutoModel.from_pretrained('sentence-transformers/paraphrase-TinyBERT-L6-v2')
25
 
 
26
  model = SentenceTransformer('sentence-transformers/paraphrase-TinyBERT-L6-v2')
27
 
28
  file_size = os.stat('Phi-3-mini-4k-instruct-q4.gguf')
29
- print("model size ====> :", file_size.st_size, "bytes")
30
 
31
 
32
  @app.route('/cv', methods=['POST'])
33
  def get_skills():
34
  cv_body = request.json.get('cv_body')
35
 
36
- # Simple inference example
37
  output = llm(
38
- f"<|user|>\n{cv_body}<|end|>\n<|assistant|>Can you list the skills mentioned in the CV?<|end|>",
39
- max_tokens=256, # Generate up to 256 tokens
40
- stop=["<|end|>"],
41
- echo=True, # Whether to echo the prompt
42
  )
43
 
44
  return jsonify({'skills': output})
45
 
 
46
  @app.get('/')
47
  def health():
48
  return jsonify({'status': 'Worked'})
49
- # we will make here post request to compare between lists of skills one has employee just one text and the other has the of jobs has many texts
50
- # the llm will say the most similar job to the cv
51
  @app.route('/compare', methods=['POST'])
52
  def compare():
53
- employee_skills = request.json.get('employee_skills') # CV text
54
- jobs_skills = request.json.get('jobs_skills') # List of job skills
55
-
 
56
  if not isinstance(jobs_skills, list) or not all(isinstance(skill, str) for skill in jobs_skills):
57
- raise ValueError("The jobs_skills must be a list of strings")
58
-
59
- # Convert texts to embeddings arrays
60
- employee_embedding = np.array([model.encode(employee_skills)])
61
- job_embeddings = np.array([model.encode(skill) for skill in jobs_skills])
62
 
63
- # Calculate similarity using cosine similarity
64
- similarities = cosine_similarity(employee_embedding, job_embeddings)[0]
 
65
 
66
- # Find the most similar job and its corresponding similarity score
67
- max_similarity = np.max(similarities)
68
- most_similar_index = np.argmax(similarities)
69
- most_similar_job = jobs_skills[most_similar_index]
70
 
71
- return jsonify({'job': most_similar_job, 'similarity_score': max_similarity})
 
 
 
72
 
 
73
  if __name__ == '__main__':
74
  app.run()
 
1
  from flask import Flask, request, jsonify
2
  from langchain_community.llms import LlamaCpp
3
  from sentence_transformers import SentenceTransformer
4
+ from transformers import AutoModel
 
 
5
  import torch
6
  from torch.nn.functional import cosine_similarity
7
  import os
8
+
9
  app = Flask(__name__)
10
 
11
  n_gpu_layers = 0
12
  n_batch = 1024
13
 
 
14
  llm = LlamaCpp(
15
+ model_path="Phi-3-mini-4k-instruct-q4.gguf",
16
  temperature=0.1,
17
  n_gpu_layers=n_gpu_layers,
18
  n_batch=n_batch,
19
  verbose=True,
20
  n_ctx=4096
21
  )
 
22
 
23
+ # Initializing sentence transformer model
24
  model = SentenceTransformer('sentence-transformers/paraphrase-TinyBERT-L6-v2')
25
 
26
  file_size = os.stat('Phi-3-mini-4k-instruct-q4.gguf')
27
+ print("Model size: ", file_size.st_size, "bytes")
28
 
29
 
30
  @app.route('/cv', methods=['POST'])
31
  def get_skills():
32
  cv_body = request.json.get('cv_body')
33
 
 
34
  output = llm(
35
+ f"\n{cv_body}\nCan you list the skills mentioned in the CV?",
36
+ max_tokens=256,
37
+ stop=[""],
38
+ echo=True,
39
  )
40
 
41
  return jsonify({'skills': output})
42
 
43
+
44
  @app.get('/')
45
  def health():
46
  return jsonify({'status': 'Worked'})
47
+
48
+
49
  @app.route('/compare', methods=['POST'])
50
  def compare():
51
+ employee_skills = request.json.get('employee_skills')
52
+ jobs_skills = request.json.get('jobs_skills')
53
+
54
+ # Validation
55
  if not isinstance(jobs_skills, list) or not all(isinstance(skill, str) for skill in jobs_skills):
56
+ raise ValueError("jobs_skills must be a list of strings")
 
 
 
 
57
 
58
+ # Encoding skills into embeddings
59
+ job_embeddings = model.encode(jobs_skills)
60
+ employee_embeddings = model.encode(employee_skills)
61
 
62
+ # Computing cosine similarity between employee skills and each job
63
+ similarity_scores = []
64
+ employee_embeddings_tensor = torch.from_numpy(employee_embeddings).unsqueeze(0)
 
65
 
66
+ for i, job_e in enumerate(job_embeddings):
67
+ job_e_tensor = torch.from_numpy(job_e).unsqueeze(0)
68
+ similarity_score = cosine_similarity(employee_embeddings_tensor, job_e_tensor, dim=1)
69
+ similarity_scores.append({"job": jobs_skills[i], "similarity_score": similarity_score.item()})
70
 
71
+ return jsonify(similarity_scores)
72
  if __name__ == '__main__':
73
  app.run()