Spaces:
Sleeping
Sleeping
Update main.py
Browse files
main.py
CHANGED
@@ -12,6 +12,7 @@ app = Flask(__name__)
|
|
12 |
n_gpu_layers = 0
|
13 |
n_batch = 1024
|
14 |
|
|
|
15 |
llm = LlamaCpp(
|
16 |
model_path="Phi-3-mini-4k-instruct-q4.gguf", # path to GGUF file
|
17 |
temperature=0.1,
|
@@ -27,15 +28,16 @@ model = SentenceTransformer('sentence-transformers/paraphrase-TinyBERT-L6-v2')
|
|
27 |
file_size = os.stat('Phi-3-mini-4k-instruct-q4.gguf')
|
28 |
print("model size ====> :", file_size.st_size, "bytes")
|
29 |
|
|
|
30 |
@app.route('/cv', methods=['POST'])
|
31 |
def get_skills():
|
32 |
cv_body = request.json.get('cv_body')
|
33 |
|
34 |
# Simple inference example
|
35 |
-
output = llm
|
36 |
-
f"
|
37 |
max_tokens=256, # Generate up to 256 tokens
|
38 |
-
stop=[""],
|
39 |
echo=True, # Whether to echo the prompt
|
40 |
)
|
41 |
|
@@ -44,56 +46,29 @@ def get_skills():
|
|
44 |
@app.get('/')
|
45 |
def health():
|
46 |
return jsonify({'status': 'Worked'})
|
47 |
-
|
48 |
-
#
|
49 |
@app.route('/compare', methods=['POST'])
|
50 |
def compare():
|
51 |
-
employee_skills = request.json.get('employee_skills')
|
52 |
-
jobs_skills = request.json.get('jobs_skills')
|
53 |
-
|
54 |
-
if not isinstance(employee_skills, list) or not all(isinstance(skill, str) for skill in employee_skills):
|
55 |
-
raise ValueError("employee_skills must be a list of strings")
|
56 |
-
|
57 |
-
if not isinstance(jobs_skills, list) or not all(isinstance(skill, str) for skill in jobs_skills):
|
58 |
-
raise ValueError("jobs_skills must be a list of strings")
|
59 |
-
|
60 |
-
job_embeddings = model.encode(jobs_skills)
|
61 |
-
employee_embeddings = model.encode(employee_skills)
|
62 |
-
|
63 |
-
similarity_scores = []
|
64 |
-
employee_embeddings_tensor = torch.from_numpy(employee_embeddings).unsqueeze(0)
|
65 |
-
|
66 |
-
for i, job_e in enumerate(job_embeddings):
|
67 |
-
job_e_tensor = torch.from_numpy(job_e).unsqueeze(0)
|
68 |
-
similarity_score = cosine_similarity(employee_embeddings_tensor, job_e_tensor, dim=1)
|
69 |
-
similarity_scores.append({"job": jobs_skills[i], "similarity_score": similarity_score.tolist()[0]})
|
70 |
-
|
71 |
-
return jsonify(similarity_scores)
|
72 |
-
|
73 |
-
# Endpoint to compare job posts with employee skills
|
74 |
-
@app.route('/compare_jop', methods=['POST'])
|
75 |
-
def compare_jop():
|
76 |
-
employee_skills = request.json.get('post')
|
77 |
-
jobs_skills = request.json.get('employee_skills')
|
78 |
-
|
79 |
-
if not isinstance(employee_skills, list) or not all(isinstance(skill, str) for skill in employee_skills):
|
80 |
-
raise ValueError("employee_skills must be a list of strings")
|
81 |
-
|
82 |
if not isinstance(jobs_skills, list) or not all(isinstance(skill, str) for skill in jobs_skills):
|
83 |
-
raise ValueError("jobs_skills must be a list of strings")
|
84 |
-
|
85 |
-
|
86 |
-
|
|
|
87 |
|
88 |
-
|
89 |
-
|
90 |
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
|
96 |
-
return jsonify(
|
97 |
|
98 |
if __name__ == '__main__':
|
99 |
app.run()
|
|
|
12 |
n_gpu_layers = 0
|
13 |
n_batch = 1024
|
14 |
|
15 |
+
|
16 |
llm = LlamaCpp(
|
17 |
model_path="Phi-3-mini-4k-instruct-q4.gguf", # path to GGUF file
|
18 |
temperature=0.1,
|
|
|
28 |
file_size = os.stat('Phi-3-mini-4k-instruct-q4.gguf')
|
29 |
print("model size ====> :", file_size.st_size, "bytes")
|
30 |
|
31 |
+
|
32 |
@app.route('/cv', methods=['POST'])
|
33 |
def get_skills():
|
34 |
cv_body = request.json.get('cv_body')
|
35 |
|
36 |
# Simple inference example
|
37 |
+
output = llm(
|
38 |
+
f"<|user|>\n{cv_body}<|end|>\n<|assistant|>Can you list the skills mentioned in the CV?<|end|>",
|
39 |
max_tokens=256, # Generate up to 256 tokens
|
40 |
+
stop=["<|end|>"],
|
41 |
echo=True, # Whether to echo the prompt
|
42 |
)
|
43 |
|
|
|
46 |
@app.get('/')
|
47 |
def health():
|
48 |
return jsonify({'status': 'Worked'})
|
49 |
+
# we will make here post request to compare between lists of skills one has employee just one text and the other has the of jobs has many texts
|
50 |
+
# the llm will say the most similar job to the cv
|
51 |
@app.route('/compare', methods=['POST'])
|
52 |
def compare():
|
53 |
+
employee_skills = request.json.get('employee_skills') # CV text
|
54 |
+
jobs_skills = request.json.get('jobs_skills') # List of job skills
|
55 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
56 |
if not isinstance(jobs_skills, list) or not all(isinstance(skill, str) for skill in jobs_skills):
|
57 |
+
raise ValueError("The jobs_skills must be a list of strings")
|
58 |
+
|
59 |
+
# Convert texts to embeddings arrays
|
60 |
+
employee_embedding = np.array([model.encode(employee_skills)])
|
61 |
+
job_embeddings = np.array([model.encode(skill) for skill in jobs_skills])
|
62 |
|
63 |
+
# Calculate similarity using cosine similarity
|
64 |
+
similarities = cosine_similarity(employee_embedding, job_embeddings)[0]
|
65 |
|
66 |
+
# Find the most similar job and its corresponding similarity score
|
67 |
+
max_similarity = np.max(similarities)
|
68 |
+
most_similar_index = np.argmax(similarities)
|
69 |
+
most_similar_job = jobs_skills[most_similar_index]
|
70 |
|
71 |
+
return jsonify({'job': most_similar_job, 'similarity_score': max_similarity})
|
72 |
|
73 |
if __name__ == '__main__':
|
74 |
app.run()
|