Spaces:
Sleeping
Sleeping
File size: 3,390 Bytes
db7aed9 bafd189 13685be 64cac72 5bfdaea db7aed9 bafd189 db7aed9 28020ae db7aed9 28020ae db7aed9 28020ae db7aed9 bafd189 27d2b1f f7cd617 13685be f7cd617 27d2b1f f7cd617 13685be f7cd617 13685be 9a4e129 13685be f7cd617 13685be f7cd617 9a4e129 f7cd617 9a4e129 f7cd617 13685be f7cd617 8a6c48e db7aed9 02113e3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 |
from flask import Flask, request, jsonify
from langchain_community.llms import LlamaCpp
from sentence_transformers import SentenceTransformer
from transformers import AutoTokenizer, AutoModel
from sklearn.metrics.pairwise import cosine_similarity
from flask import Flask, request, jsonify
import torch
import os
app = Flask(__name__)
n_gpu_layers = 0
n_batch = 1024
llm = LlamaCpp(
model_path="Phi-3-mini-4k-instruct-q4.gguf", # path to GGUF file
temperature=0.1,
n_gpu_layers=n_gpu_layers,
n_batch=n_batch,
verbose=True,
n_ctx=4096
)
model0 = AutoModel.from_pretrained('sentence-transformers/paraphrase-TinyBERT-L6-v2')
model = SentenceTransformer('sentence-transformers/paraphrase-TinyBERT-L6-v2')
file_size = os.stat('Phi-3-mini-4k-instruct-q4.gguf')
print("model size ====> :", file_size.st_size, "bytes")
@app.route('/cv', methods=['POST'])
def get_skills():
cv_body = request.json.get('cv_body')
# Simple inference example
output = llm(
f"<|user|>\n{cv_body}<|end|>\n<|assistant|>Can you list the skills mentioned in the CV?<|end|>",
max_tokens=256, # Generate up to 256 tokens
stop=["<|end|>"],
echo=True, # Whether to echo the prompt
)
return jsonify({'skills': output})
@app.get('/')
def health():
return jsonify({'status': 'Worked'})
# we will make here post request to compare between lists of skills one has employee just one text and the other has the of jobs has many texts
# the llm will say the most similar job to the cv
@app.route('/compare', methods=['POST'])
def compare():
data = request.json
jobs_skills = data.get('jobs_skills')
employees_skills = data.get('employees_skills')
# Validate input
if not isinstance(jobs_skills, list) or not all(isinstance(skill, str) for skill in jobs_skills):
return jsonify({"error": "jobs_skills must be a list of strings"}), 400
if not isinstance(employees_skills, list) or not all(isinstance(skills, str) for skills in employees_skills):
return jsonify({"error": "employees_skills must be a list of strings"}), 400
# Encode job skills
job_embeddings = [model.encode(skill) for skill in jobs_skills]
job_embeddings_tensor = torch.tensor(job_embeddings, dtype=torch.float32)
# Initialize a dictionary to store similarities for each employee
all_similarities = {}
for idx, employee_skills in enumerate(employees_skills):
# Encode employee skills
employee_embedding = model.encode(employee_skills)
employee_embedding_tensor = torch.tensor(employee_embedding, dtype=torch.float32).unsqueeze(0)
# Calculate cosine similarity
similarities = cosine_similarity(employee_embedding_tensor, job_embeddings_tensor)[0]
# Find the job with highest similarity for this employee
max_similarity_index = similarities.argmax()
max_similarity_job = jobs_skills[max_similarity_index]
# Convert similarities to float for JSON serialization
similarities_dict = {job: float(similarity) for job, similarity in zip(jobs_skills, similarities)}
all_similarities[f'employee_{idx+1}'] = {
'job': max_similarity_job,
'similarities': similarities_dict
}
return jsonify(all_similarities)
if __name__ == '__main__':
app.run() |