File size: 2,307 Bytes
db7aed9
 
 
 
64cac72
 
db7aed9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28020ae
 
db7aed9
 
 
 
 
 
28020ae
 
 
db7aed9
28020ae
 
 
 
 
 
 
 
 
 
 
 
 
db7aed9
27d2b1f
 
28020ae
 
dc11f1f
28020ae
dc11f1f
b4821f3
27d2b1f
b4821f3
db7aed9
 
2534516
2a28615
db7aed9
8a6c48e
2716c13
db7aed9
2716c13
 
 
 
 
 
 
 
db7aed9
28020ae
 
8a6c48e
db7aed9
28020ae
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
from flask import Flask, request, jsonify
from langchain_community.llms import LlamaCpp
from sentence_transformers import SentenceTransformer
from transformers import AutoModel
import torch
from torch.nn.functional import cosine_similarity
import os 

app = Flask(__name__)

n_gpu_layers = 0
n_batch = 1024

llm = LlamaCpp(
    model_path="Phi-3-mini-4k-instruct-q4.gguf",  # path to GGUF file
    temperature=0.1,
    n_gpu_layers=n_gpu_layers,
    n_batch=n_batch,
    verbose=True,
    n_ctx=4096
)
model0 = AutoModel.from_pretrained('sentence-transformers/paraphrase-TinyBERT-L6-v2')

model = SentenceTransformer('sentence-transformers/paraphrase-TinyBERT-L6-v2')

file_size = os.stat('Phi-3-mini-4k-instruct-q4.gguf')
print("model size ====> :", file_size.st_size, "bytes")


@app.route('/cv', methods=['POST'])
def get_skills():
    cv_body = request.json.get('cv_body')

    # Simple inference example
    output = llm(
        f"<|user|>\n{cv_body}<|end|>\n<|assistant|>Can you list the skills mentioned in the CV?<|end|>",
        max_tokens=256,  # Generate up to 256 tokens
        stop=["<|end|>"], 
        echo=True,  # Whether to echo the prompt
    )

    return jsonify({'skills': output})

@app.get('/')
def health():
    return jsonify({'status': 'Worked'})

@app.route('/compare', methods=['POST'])
def compare():
    employee_skills = request.json.get('jobs_skills') 
    jobs_skills = request.json.get('employee_skills')  

    

    # Validation
    if not isinstance(jobs_skills, list) or not all(isinstance(skill, str) for skill in jobs_skills):
        raise ValueError("jobs_skills must be a list of strings")

    # Encoding skills into embeddings
    job_embeddings = model.encode(jobs_skills)
    employee_embeddings = model.encode(employee_skills)

similarity_scores = []
employee_embeddings_tensor = torch.from_numpy(employee_embeddings).unsqueeze(0)

for i, job_e in enumerate(job_embeddings):
    job_e_tensor = torch.from_numpy(job_e).unsqueeze(0)
    similarity_score = cosine_similarity(employee_embeddings_tensor, job_e_tensor, dim=1)
    
    # Append all similarity scores for each job
    similarity_scores.append({"job": jobs_skills[i], "similarity_scores": similarity_score.tolist()})

return jsonify(similarity_scores)




if __name__ == '__main__':
    app.run()