Spaces:
Sleeping
Sleeping
Update main.py
Browse files
main.py
CHANGED
@@ -2,42 +2,51 @@ from flask import Flask, request, jsonify
|
|
2 |
from langchain_community.llms import LlamaCpp
|
3 |
from sentence_transformers import SentenceTransformer
|
4 |
from transformers import AutoTokenizer, AutoModel
|
|
|
5 |
|
6 |
# cosine_similarity
|
7 |
import torch
|
8 |
from torch.nn.functional import cosine_similarity
|
9 |
-
import os
|
|
|
10 |
app = Flask(__name__)
|
11 |
|
12 |
n_gpu_layers = 0
|
13 |
n_batch = 1024
|
14 |
|
|
|
|
|
|
|
|
|
|
|
|
|
15 |
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
|
|
25 |
|
|
|
26 |
model = SentenceTransformer('sentence-transformers/paraphrase-TinyBERT-L6-v2')
|
27 |
|
28 |
file_size = os.stat('Phi-3-mini-4k-instruct-q4.gguf')
|
29 |
print("model size ====> :", file_size.st_size, "bytes")
|
30 |
|
31 |
-
|
32 |
@app.route('/cv', methods=['POST'])
|
33 |
def get_skills():
|
34 |
cv_body = request.json.get('cv_body')
|
35 |
|
36 |
# Simple inference example
|
37 |
output = llm(
|
38 |
-
f"
|
39 |
max_tokens=256, # Generate up to 256 tokens
|
40 |
-
stop=["
|
41 |
echo=True, # Whether to echo the prompt
|
42 |
)
|
43 |
|
@@ -47,26 +56,32 @@ def get_skills():
|
|
47 |
def health():
|
48 |
return jsonify({'status': 'Worked'})
|
49 |
|
50 |
-
# we will make here post request to compare between lists of skills one has employee just one text and the other has the of jobs has many texts
|
51 |
-
# the llm will say the most similar job to the cv
|
52 |
@app.route('/compare', methods=['POST'])
|
53 |
def compare():
|
54 |
-
employee_skills = request.json.get('employee_skills')# string
|
55 |
-
|
56 |
-
|
57 |
-
#example: jobs_skills = ["<|assistant|> Sure! Here are the skills required for the Software Engineer position:\n\n1. Proficiency in Python programming language\n2. Experience with Java development\n3. Knowledge of web development technologies (e.g., HTML, CSS, JavaScript)\n4. Familiarity with machine learning algorithms and frameworks (e.g., TensorFlow, PyTorch)\n5. Strong problem-solving skills\n6. Effective communication abilities\n7. Agile methodology understanding\n8. Version control system expertise (e.g., Git)\n9. Software architecture design experience\n10. Testing methodologies knowledge (unit tests, integration tests, end-to-end tests)\n11. Continuous learning mindset for staying updated on technological advancements\n12. Collaboration skills for effective teamwork\n\nThese skills are essential for the Software Engineer role to contribute to the development of innovative software solutions, optimize performance, ensure code quality, and foster a collaborative work environment.", "<|assistant|> Certainly! Here are the skills required for the Data Scientist position:\n\n1. Proficiency in Python programming language\n2. Experience with data analysis and visualization tools (e.g., Pandas, Matplotlib)\n3. Knowledge of machine learning algorithms and statistical modeling techniques\n4. Strong problem-solving and analytical skills\n5. Effective communication abilities\n6. Agile methodology understanding\n7. Version control system expertise (e.g., Git)\n8. Data preprocessing and cleaning experience\n9. Model evaluation and optimization skills\n10. Continuous learning mindset for staying updated on data science advancements\n11. Collaboration skills for effective teamwork\n\nThese skills are essential for the Data Scientist role to analyze data, develop predictive models, optimize algorithms, and collaborate with cross-functional teams."]
|
58 |
if not isinstance(jobs_skills, list) or not all(isinstance(skill, str) for skill in jobs_skills):
|
59 |
raise ValueError("jobs_skills must be a list of strings")
|
|
|
60 |
job_embeddings = model.encode(jobs_skills)
|
61 |
employee_embeddings = model.encode(employee_skills)
|
62 |
sim = []
|
63 |
employee_embeddings_tensor = torch.from_numpy(employee_embeddings).unsqueeze(0)
|
64 |
for job_e in job_embeddings:
|
65 |
job_e_tensor = torch.from_numpy(job_e).unsqueeze(0)
|
66 |
-
sim.append(cosine_similarity(employee_embeddings_tensor, job_e_tensor,dim=1))
|
|
|
67 |
max_sim = max(sim)
|
68 |
index = sim.index(max_sim)
|
69 |
return jsonify({'job': jobs_skills[index]})
|
70 |
|
|
|
|
|
|
|
|
|
|
|
|
|
71 |
if __name__ == '__main__':
|
72 |
-
app.run()
|
|
|
|
2 |
from langchain_community.llms import LlamaCpp
|
3 |
from sentence_transformers import SentenceTransformer
|
4 |
from transformers import AutoTokenizer, AutoModel
|
5 |
+
from huggingface_hub import hf_hub_download, HfApi
|
6 |
|
7 |
# cosine_similarity
|
8 |
import torch
|
9 |
from torch.nn.functional import cosine_similarity
|
10 |
+
import os
|
11 |
+
|
12 |
app = Flask(__name__)
|
13 |
|
14 |
n_gpu_layers = 0
|
15 |
n_batch = 1024
|
16 |
|
17 |
+
# تنزيل النموذج باستخدام معالجة الأخطاء
|
18 |
+
try:
|
19 |
+
model_path = hf_hub_download(repo_id="repo_name", filename="model_file_name", force_download=True)
|
20 |
+
except Exception as e:
|
21 |
+
print(f"Error downloading the model: {e}")
|
22 |
+
model_path = None
|
23 |
|
24 |
+
# تأكد من أن النموذج تم تنزيله بنجاح
|
25 |
+
if model_path:
|
26 |
+
llm = LlamaCpp(
|
27 |
+
model_path=model_path, # path to GGUF file
|
28 |
+
temperature=0.1,
|
29 |
+
n_gpu_layers=n_gpu_layers,
|
30 |
+
n_batch=n_batch,
|
31 |
+
verbose=True,
|
32 |
+
n_ctx=4096
|
33 |
+
)
|
34 |
|
35 |
+
model0 = AutoModel.from_pretrained('sentence-transformers/paraphrase-TinyBERT-L6-v2')
|
36 |
model = SentenceTransformer('sentence-transformers/paraphrase-TinyBERT-L6-v2')
|
37 |
|
38 |
file_size = os.stat('Phi-3-mini-4k-instruct-q4.gguf')
|
39 |
print("model size ====> :", file_size.st_size, "bytes")
|
40 |
|
|
|
41 |
@app.route('/cv', methods=['POST'])
|
42 |
def get_skills():
|
43 |
cv_body = request.json.get('cv_body')
|
44 |
|
45 |
# Simple inference example
|
46 |
output = llm(
|
47 |
+
f"\n{cv_body}\nCan you list the skills mentioned in the CV?",
|
48 |
max_tokens=256, # Generate up to 256 tokens
|
49 |
+
stop=[""],
|
50 |
echo=True, # Whether to echo the prompt
|
51 |
)
|
52 |
|
|
|
56 |
def health():
|
57 |
return jsonify({'status': 'Worked'})
|
58 |
|
|
|
|
|
59 |
@app.route('/compare', methods=['POST'])
|
60 |
def compare():
|
61 |
+
employee_skills = request.json.get('employee_skills') # string
|
62 |
+
jobs_skills = request.json.get('jobs_skills') # list of strings
|
63 |
+
|
|
|
64 |
if not isinstance(jobs_skills, list) or not all(isinstance(skill, str) for skill in jobs_skills):
|
65 |
raise ValueError("jobs_skills must be a list of strings")
|
66 |
+
|
67 |
job_embeddings = model.encode(jobs_skills)
|
68 |
employee_embeddings = model.encode(employee_skills)
|
69 |
sim = []
|
70 |
employee_embeddings_tensor = torch.from_numpy(employee_embeddings).unsqueeze(0)
|
71 |
for job_e in job_embeddings:
|
72 |
job_e_tensor = torch.from_numpy(job_e).unsqueeze(0)
|
73 |
+
sim.append(cosine_similarity(employee_embeddings_tensor, job_e_tensor, dim=1))
|
74 |
+
|
75 |
max_sim = max(sim)
|
76 |
index = sim.index(max_sim)
|
77 |
return jsonify({'job': jobs_skills[index]})
|
78 |
|
79 |
+
@app.route('/models', methods=['GET'])
|
80 |
+
def list_models():
|
81 |
+
hf_api = HfApi()
|
82 |
+
models = hf_api.list_models()
|
83 |
+
return jsonify({'models': models})
|
84 |
+
|
85 |
if __name__ == '__main__':
|
86 |
+
app.run()
|
87 |
+
|