Spaces:

zayanomar5
/

omarz

Sleeping

File size: 2,714 Bytes

db7aed9
 
 
bafd189
13685be
 
64cac72
db7aed9
 
 
 
 
bafd189
db7aed9
 
 
 
 
 
 
 
28020ae
 
db7aed9
 
 
 
 
 
28020ae
 
 
db7aed9
28020ae
 
 
 
 
 
 
 
 
 
 
 
 
db7aed9
bafd189
 
27d2b1f
 
bafd189
13685be
 
27d2b1f
b4821f3
13685be
 
 
2a28615
13685be
 
 
 
 
 
 
 
 
 
 
 
 
 
8a6c48e
db7aed9
13685be

from flask import Flask, request, jsonify
from langchain_community.llms import LlamaCpp
from sentence_transformers import SentenceTransformer
from transformers import AutoTokenizer, AutoModel
from sklearn.metrics.pairwise import cosine_similarity
from flask import Flask, request, jsonify
import torch
app = Flask(__name__)

n_gpu_layers = 0
n_batch = 1024


llm = LlamaCpp(
    model_path="Phi-3-mini-4k-instruct-q4.gguf",  # path to GGUF file
    temperature=0.1,
    n_gpu_layers=n_gpu_layers,
    n_batch=n_batch,
    verbose=True,
    n_ctx=4096
)
model0 = AutoModel.from_pretrained('sentence-transformers/paraphrase-TinyBERT-L6-v2')

model = SentenceTransformer('sentence-transformers/paraphrase-TinyBERT-L6-v2')

file_size = os.stat('Phi-3-mini-4k-instruct-q4.gguf')
print("model size ====> :", file_size.st_size, "bytes")


@app.route('/cv', methods=['POST'])
def get_skills():
    cv_body = request.json.get('cv_body')

    # Simple inference example
    output = llm(
        f"<|user|>\n{cv_body}<|end|>\n<|assistant|>Can you list the skills mentioned in the CV?<|end|>",
        max_tokens=256,  # Generate up to 256 tokens
        stop=["<|end|>"], 
        echo=True,  # Whether to echo the prompt
    )

    return jsonify({'skills': output})

@app.get('/')
def health():
    return jsonify({'status': 'Worked'})

# we will make here post request to compare between lists of skills one has employee just one text and the other has the of jobs has many texts
# the llm will say the most similar job to the cv
@app.route('/compare', methods=['POST'])
def compare():
    jobs_skills = request.json.get('jobs_skills')
    employee_skills = request.json.get('employee_skills')
    
    if not isinstance(jobs_skills, list) or not all(isinstance(skill, str) for skill in jobs_skills):
        raise ValueError("jobs_skills must be a list of strings")
    
    # Encode job and employee skills
    job_embeddings = [model.encode(skill) for skill in jobs_skills]
    employee_embeddings = model.encode(employee_skills)
    
    # Calculate cosine similarity
    similarities = []
    employee_embedding_tensor = torch.tensor(employee_embeddings).unsqueeze(0)
    for job_embedding in job_embeddings:
        job_embedding_tensor = torch.tensor(job_embedding).unsqueeze(0)
        similarity = cosine_similarity(employee_embedding_tensor, job_embedding_tensor)
        similarities.append(similarity.item())
    
    # Find the job with highest similarity
    max_similarity_index = similarities.index(max(similarities))
    max_similarity_job = jobs_skills[max_similarity_index]
    
    return jsonify({'job': max_similarity_job, 'similarity': max(similarities)})

if __name__ == '__main__':
    app.run())