from flask import Flask, request, jsonify from langchain_community.llms import LlamaCpp from sentence_transformers import SentenceTransformer from transformers import AutoTokenizer, AutoModel from sklearn.metrics.pairwise import cosine_similarity from flask import Flask, request, jsonify import torch import os app = Flask(__name__) n_gpu_layers = 0 n_batch = 1024 llm = LlamaCpp( model_path="Phi-3-mini-4k-instruct-q4.gguf", # path to GGUF file temperature=0.1, n_gpu_layers=n_gpu_layers, n_batch=n_batch, verbose=True, n_ctx=4096 ) model0 = AutoModel.from_pretrained('sentence-transformers/paraphrase-TinyBERT-L6-v2') model = SentenceTransformer('sentence-transformers/paraphrase-TinyBERT-L6-v2') file_size = os.stat('Phi-3-mini-4k-instruct-q4.gguf') print("model size ====> :", file_size.st_size, "bytes") @app.route('/cv', methods=['POST']) def get_skills(): cv_body = request.json.get('cv_body') # Simple inference example output = llm( f"<|user|>\n{cv_body}<|end|>\n<|assistant|>Can you list the skills mentioned in the CV?<|end|>", max_tokens=256, # Generate up to 256 tokens stop=["<|end|>"], echo=True, # Whether to echo the prompt ) return jsonify({'skills': output}) @app.get('/') def health(): return jsonify({'status': 'Worked'}) # we will make here post request to compare between lists of skills one has employee just one text and the other has the of jobs has many texts # the llm will say the most similar job to the cv @app.route('/compare', methods=['POST']) def compare(): data = request.json jobs_skills = data.get('jobs_skills') employees_skills = data.get('employees_skills') # Validate input if not isinstance(jobs_skills, list) or not all(isinstance(skill, str) for skill in jobs_skills): return jsonify({"error": "jobs_skills must be a list of strings"}), 400 if not isinstance(employees_skills, list) or not all(isinstance(skills, str) for skills in employees_skills): return jsonify({"error": "employees_skills must be a list of strings"}), 400 # Encode job skills job_embeddings = [model.encode(skill) for skill in jobs_skills] job_embeddings_tensor = torch.tensor(job_embeddings, dtype=torch.float32) # Initialize a dictionary to store similarities for each employee all_similarities = {} for idx, employee_skills in enumerate(employees_skills): # Encode employee skills employee_embedding = model.encode(employee_skills) employee_embedding_tensor = torch.tensor(employee_embedding, dtype=torch.float32).unsqueeze(0) # Calculate cosine similarity similarities = cosine_similarity(employee_embedding_tensor, job_embeddings_tensor)[0] # Find the job with highest similarity for this employee max_similarity_index = similarities.argmax() max_similarity_job = jobs_skills[max_similarity_index] # Convert similarities to float for JSON serialization similarities_dict = {job: float(similarity) for job, similarity in zip(jobs_skills, similarities)} all_similarities[f'employee_{idx+1}'] = { 'job': max_similarity_job, 'similarities': similarities_dict } return jsonify(all_similarities) if __name__ == '__main__': app.run()