Spaces:
Sleeping
Sleeping
Update main.py
Browse files
main.py
CHANGED
@@ -1,74 +1,73 @@
|
|
1 |
from flask import Flask, request, jsonify
|
2 |
from langchain_community.llms import LlamaCpp
|
3 |
from sentence_transformers import SentenceTransformer
|
4 |
-
from transformers import
|
5 |
-
|
6 |
-
# cosine_similarity
|
7 |
import torch
|
8 |
from torch.nn.functional import cosine_similarity
|
9 |
import os
|
|
|
10 |
app = Flask(__name__)
|
11 |
|
12 |
n_gpu_layers = 0
|
13 |
n_batch = 1024
|
14 |
|
15 |
-
|
16 |
llm = LlamaCpp(
|
17 |
-
model_path="Phi-3-mini-4k-instruct-q4.gguf",
|
18 |
temperature=0.1,
|
19 |
n_gpu_layers=n_gpu_layers,
|
20 |
n_batch=n_batch,
|
21 |
verbose=True,
|
22 |
n_ctx=4096
|
23 |
)
|
24 |
-
model0 = AutoModel.from_pretrained('sentence-transformers/paraphrase-TinyBERT-L6-v2')
|
25 |
|
|
|
26 |
model = SentenceTransformer('sentence-transformers/paraphrase-TinyBERT-L6-v2')
|
27 |
|
28 |
file_size = os.stat('Phi-3-mini-4k-instruct-q4.gguf')
|
29 |
-
print("
|
30 |
|
31 |
|
32 |
@app.route('/cv', methods=['POST'])
|
33 |
def get_skills():
|
34 |
cv_body = request.json.get('cv_body')
|
35 |
|
36 |
-
# Simple inference example
|
37 |
output = llm(
|
38 |
-
f"
|
39 |
-
max_tokens=256,
|
40 |
-
stop=["
|
41 |
-
echo=True,
|
42 |
)
|
43 |
|
44 |
return jsonify({'skills': output})
|
45 |
|
|
|
46 |
@app.get('/')
|
47 |
def health():
|
48 |
return jsonify({'status': 'Worked'})
|
49 |
-
|
50 |
-
|
51 |
@app.route('/compare', methods=['POST'])
|
52 |
def compare():
|
53 |
-
employee_skills = request.json.get('employee_skills')
|
54 |
-
jobs_skills = request.json.get('jobs_skills')
|
55 |
-
|
|
|
56 |
if not isinstance(jobs_skills, list) or not all(isinstance(skill, str) for skill in jobs_skills):
|
57 |
-
raise ValueError("
|
58 |
-
|
59 |
-
# Convert texts to embeddings arrays
|
60 |
-
employee_embedding = np.array([model.encode(employee_skills)])
|
61 |
-
job_embeddings = np.array([model.encode(skill) for skill in jobs_skills])
|
62 |
|
63 |
-
#
|
64 |
-
|
|
|
65 |
|
66 |
-
#
|
67 |
-
|
68 |
-
|
69 |
-
most_similar_job = jobs_skills[most_similar_index]
|
70 |
|
71 |
-
|
|
|
|
|
|
|
72 |
|
|
|
73 |
if __name__ == '__main__':
|
74 |
app.run()
|
|
|
1 |
from flask import Flask, request, jsonify
|
2 |
from langchain_community.llms import LlamaCpp
|
3 |
from sentence_transformers import SentenceTransformer
|
4 |
+
from transformers import AutoModel
|
|
|
|
|
5 |
import torch
|
6 |
from torch.nn.functional import cosine_similarity
|
7 |
import os
|
8 |
+
|
9 |
app = Flask(__name__)
|
10 |
|
11 |
n_gpu_layers = 0
|
12 |
n_batch = 1024
|
13 |
|
|
|
14 |
llm = LlamaCpp(
|
15 |
+
model_path="Phi-3-mini-4k-instruct-q4.gguf",
|
16 |
temperature=0.1,
|
17 |
n_gpu_layers=n_gpu_layers,
|
18 |
n_batch=n_batch,
|
19 |
verbose=True,
|
20 |
n_ctx=4096
|
21 |
)
|
|
|
22 |
|
23 |
+
# Initializing sentence transformer model
|
24 |
model = SentenceTransformer('sentence-transformers/paraphrase-TinyBERT-L6-v2')
|
25 |
|
26 |
file_size = os.stat('Phi-3-mini-4k-instruct-q4.gguf')
|
27 |
+
print("Model size: ", file_size.st_size, "bytes")
|
28 |
|
29 |
|
30 |
@app.route('/cv', methods=['POST'])
|
31 |
def get_skills():
|
32 |
cv_body = request.json.get('cv_body')
|
33 |
|
|
|
34 |
output = llm(
|
35 |
+
f"\n{cv_body}\nCan you list the skills mentioned in the CV?",
|
36 |
+
max_tokens=256,
|
37 |
+
stop=[""],
|
38 |
+
echo=True,
|
39 |
)
|
40 |
|
41 |
return jsonify({'skills': output})
|
42 |
|
43 |
+
|
44 |
@app.get('/')
|
45 |
def health():
|
46 |
return jsonify({'status': 'Worked'})
|
47 |
+
|
48 |
+
|
49 |
@app.route('/compare', methods=['POST'])
|
50 |
def compare():
|
51 |
+
employee_skills = request.json.get('employee_skills')
|
52 |
+
jobs_skills = request.json.get('jobs_skills')
|
53 |
+
|
54 |
+
# Validation
|
55 |
if not isinstance(jobs_skills, list) or not all(isinstance(skill, str) for skill in jobs_skills):
|
56 |
+
raise ValueError("jobs_skills must be a list of strings")
|
|
|
|
|
|
|
|
|
57 |
|
58 |
+
# Encoding skills into embeddings
|
59 |
+
job_embeddings = model.encode(jobs_skills)
|
60 |
+
employee_embeddings = model.encode(employee_skills)
|
61 |
|
62 |
+
# Computing cosine similarity between employee skills and each job
|
63 |
+
similarity_scores = []
|
64 |
+
employee_embeddings_tensor = torch.from_numpy(employee_embeddings).unsqueeze(0)
|
|
|
65 |
|
66 |
+
for i, job_e in enumerate(job_embeddings):
|
67 |
+
job_e_tensor = torch.from_numpy(job_e).unsqueeze(0)
|
68 |
+
similarity_score = cosine_similarity(employee_embeddings_tensor, job_e_tensor, dim=1)
|
69 |
+
similarity_scores.append({"job": jobs_skills[i], "similarity_score": similarity_score.item()})
|
70 |
|
71 |
+
return jsonify(similarity_scores)
|
72 |
if __name__ == '__main__':
|
73 |
app.run()
|