Spaces:
No application file
No application file
from flask import Flask, request, jsonify | |
from langchain_community.llms import LlamaCpp | |
import os | |
app = Flask(__name__) | |
n_gpu_layers = 0 | |
n_batch = 1024 | |
llm = LlamaCpp( | |
model_path="Phi-3-mini-4k-instruct-q4.gguf", # path to GGUF file | |
temperature=0.1, | |
n_gpu_layers=n_gpu_layers, | |
n_batch=n_batch, | |
verbose=True, | |
n_ctx=4096 | |
) | |
file_size = os.stat('Phi-3-mini-4k-instruct-q4.gguf') | |
print("model size ====> :", file_size.st_size, "bytes") | |
def get_skills(): | |
cv_body = request.json.get('cv_body') | |
# Simple inference example | |
output = llm( | |
f"<|user|>\n{cv_body}<|end|>\n<|assistant|>Can you list the skills mentioned in the CV?<|end|>", | |
max_tokens=256, # Generate up to 256 tokens | |
stop=["<|end|>"], | |
echo=True, # Whether to echo the prompt | |
) | |
return jsonify({'skills': output}) | |
if __name__ == '__main__': | |
app.run() | |
from flask import Flask, request, jsonify | |
import nltk | |
from gensim.models import Word2Vec | |
import numpy as np | |
from sklearn.metrics.pairwise import cosine_similarity | |
import matplotlib.pyplot as plt | |
import io | |
import base64 | |
nltk.download('punkt') | |
app = Flask(__name__) | |
texts = [ | |
"This is a sample text.", | |
"Another example of text.", | |
"More texts to compare." | |
] | |
tokenized_texts = [nltk.word_tokenize(text.lower()) for text in texts] | |
word_embeddings_model = Word2Vec(sentences=tokenized_texts, vector_size=100, window=5, min_count=1, workers=4) | |
def text_embedding(text): | |
words = nltk.word_tokenize(text.lower()) | |
embeddings = [word_embeddings_model.wv[word] for word in words if word in word_embeddings_model.wv] | |
if embeddings: | |
return np.mean(embeddings, axis=0) | |
else: | |
return np.zeros(word_embeddings_model.vector_size) | |
def process(): | |
data = request.get_json() | |
input_text = data.get('input_text', '') | |
if not input_text: | |
return jsonify({'error': 'No input text provided'}), 400 | |
input_embedding = text_embedding(input_text) | |
text_embeddings = [text_embedding(text) for text in texts] | |
similarities = cosine_similarity([input_embedding], text_embeddings).flatten() | |
similarities_percentages = [similarity * 100 for similarity in similarities] | |
fig, ax = plt.subplots(figsize=(10, 6)) | |
texts_for_plotting = [f"Text {i+1}" for i in range(len(texts))] | |
ax.bar(texts_for_plotting, similarities_percentages) | |
ax.set_ylabel('Similarity (%)') | |
ax.set_xlabel('Texts') | |
ax.set_title('Similarity of Input Text with other texts') | |
plt.xticks(rotation=45, ha='right') | |
plt.tight_layout() | |
buf = io.BytesIO() | |
plt.savefig(buf, format='png') | |
buf.seek(0) | |
img_base64 = base64.b64encode(buf.read()).decode('utf-8') | |
plt.close() | |
sorted_indices = np.argsort(similarities)[::-1] | |
similar_texts = [(similarities[idx] * 100, texts[idx]) for idx in sorted_indices[:3]] | |
response = { | |
'similarities': similarities_percentages, | |
'plot': img_base64, | |
'most_similar_texts': similar_texts | |
} | |
return jsonify(response) | |
if __name__ == '__main__': | |
app.run(host='0.0.0.0', port=8080, debug=True) | |