Spaces:
Running
Running
import faiss | |
import pickle | |
import numpy as np | |
import re | |
from sentence_transformers import SentenceTransformer | |
from huggingface_hub import hf_hub_download | |
from llama_cpp import Llama | |
def load_faiss_index(index_path="faiss_index/faiss_index.faiss", doc_path="faiss_index/documents.pkl"): | |
index = faiss.read_index(index_path) | |
with open(doc_path, "rb") as f: | |
documents = pickle.load(f) | |
return index, documents | |
def get_embedding_model(): | |
return SentenceTransformer("sentence-transformers/multi-qa-MiniLM-L6-cos-v1") | |
def query_index(question, index, documents, model, k=3): | |
question_embedding = model.encode([question]) | |
_, indices = index.search(np.array(question_embedding).astype("float32"), k) | |
return [documents[i] for i in indices[0]] | |
def nettoyer_context(context): | |
context = re.sub(r"\[\'(.*?)\'\]", r"\1", context) | |
context = context.replace("None", "") | |
return context | |
def generate_answer(question, context): | |
model_file = hf_hub_download( | |
repo_id="TheBloke/Mistral-7B-Instruct-v0.1-GGUF", | |
filename="mistral-7b-instruct-v0.1.Q4_K_M.gguf" | |
) | |
llm = Llama( | |
model_path=model_file, | |
n_ctx=2048, | |
n_threads=6, | |
verbose=False | |
) | |
prompt = f"""Voici des informations sur des établissements et formations : | |
{context} | |
Formule ta réponse comme un conseiller d’orientation bienveillant, de manière fluide et naturelle, sans énumérations brutes. | |
Question : {question} | |
Réponse : | |
""" | |
output = llm(prompt, max_tokens=128, stop=["</s>"]) | |
return output["choices"][0]["text"].strip() | |