Spaces:

Programmes
/

E_P

Running

E_P

File size: 1,600 Bytes

4b4260f
 
 
c1ff486
87392ed
ba275b2
04cd854
4b4260f
3b93dfc
4b4260f
 
 
 
 
 
87392ed
4b4260f
 
 
 
87392ed
4b4260f
c1ff486
04cd854
 
c1ff486
 
4b4260f
ba275b2
 
 
 
 
04cd854
ba275b2
859e59f
04cd854
 
 
87392ed
ba275b2
87392ed
c1ff486
da32198
affd798
c1ff486
 
 
 
 
b8be8c0
04cd854

import faiss
import pickle
import numpy as np
import re
from sentence_transformers import SentenceTransformer
from huggingface_hub import hf_hub_download
from llama_cpp import Llama

def load_faiss_index(index_path="faiss_index/faiss_index.faiss", doc_path="faiss_index/documents.pkl"):
    index = faiss.read_index(index_path)
    with open(doc_path, "rb") as f:
        documents = pickle.load(f)
    return index, documents

def get_embedding_model():
    return SentenceTransformer("sentence-transformers/multi-qa-MiniLM-L6-cos-v1")

def query_index(question, index, documents, model, k=3):
    question_embedding = model.encode([question])
    _, indices = index.search(np.array(question_embedding).astype("float32"), k)
    return [documents[i] for i in indices[0]]

def nettoyer_context(context):
    context = re.sub(r"\[\'(.*?)\'\]", r"\1", context)
    context = context.replace("None", "")
    return context

def generate_answer(question, context):
    model_file = hf_hub_download(
        repo_id="TheBloke/Mistral-7B-Instruct-v0.1-GGUF",
        filename="mistral-7b-instruct-v0.1.Q4_K_M.gguf"
    )

    llm = Llama(
        model_path=model_file,
        n_ctx=2048,
        n_threads=6,
        verbose=False
    )

    prompt = f"""Voici des informations sur des établissements et formations :

{context}

Formule ta réponse comme un conseiller d’orientation bienveillant, de manière fluide et naturelle, sans énumérations brutes.

Question : {question}
Réponse :
"""

    output = llm(prompt, max_tokens=128, stop=["</s>"])
    return output["choices"][0]["text"].strip()