Spaces:
Running
Running
File size: 1,600 Bytes
4b4260f c1ff486 87392ed ba275b2 04cd854 4b4260f 3b93dfc 4b4260f 87392ed 4b4260f 87392ed 4b4260f c1ff486 04cd854 c1ff486 4b4260f ba275b2 04cd854 ba275b2 859e59f 04cd854 87392ed ba275b2 87392ed c1ff486 da32198 affd798 c1ff486 b8be8c0 04cd854 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 |
import faiss
import pickle
import numpy as np
import re
from sentence_transformers import SentenceTransformer
from huggingface_hub import hf_hub_download
from llama_cpp import Llama
def load_faiss_index(index_path="faiss_index/faiss_index.faiss", doc_path="faiss_index/documents.pkl"):
index = faiss.read_index(index_path)
with open(doc_path, "rb") as f:
documents = pickle.load(f)
return index, documents
def get_embedding_model():
return SentenceTransformer("sentence-transformers/multi-qa-MiniLM-L6-cos-v1")
def query_index(question, index, documents, model, k=3):
question_embedding = model.encode([question])
_, indices = index.search(np.array(question_embedding).astype("float32"), k)
return [documents[i] for i in indices[0]]
def nettoyer_context(context):
context = re.sub(r"\[\'(.*?)\'\]", r"\1", context)
context = context.replace("None", "")
return context
def generate_answer(question, context):
model_file = hf_hub_download(
repo_id="TheBloke/Mistral-7B-Instruct-v0.1-GGUF",
filename="mistral-7b-instruct-v0.1.Q4_K_M.gguf"
)
llm = Llama(
model_path=model_file,
n_ctx=2048,
n_threads=6,
verbose=False
)
prompt = f"""Voici des informations sur des établissements et formations :
{context}
Formule ta réponse comme un conseiller d’orientation bienveillant, de manière fluide et naturelle, sans énumérations brutes.
Question : {question}
Réponse :
"""
output = llm(prompt, max_tokens=128, stop=["</s>"])
return output["choices"][0]["text"].strip()
|