Spaces:

mrneuralnet
/

mle-case-study

Sleeping

File size: 3,377 Bytes

import faiss
from sentence_transformers import SentenceTransformer
import time
import json

from langchain_core.documents import Document

def init_cache():
    index = faiss.IndexFlatL2(1024)
    if index.is_trained:
        print("Index trained")

    # Initialize Sentence Transformer model
    encoder = SentenceTransformer("multilingual-e5-large")

    return index, encoder

def retrieve_cache(json_file):
    try:
        with open(json_file, "r") as file:
            cache = json.load(file)
    except FileNotFoundError:
        cache = {"query": [], "embeddings": [], "answers": [], "response_text": []}

    return cache

def store_cache(json_file, cache):
    with open(json_file, "w") as file:
        json.dump(cache, file)

class SemanticCache:
    def __init__(self, retriever, json_file="cache_file.json", thresold=0.35):
        # Initialize Faiss index with Euclidean distance
        self.retriever = retriever
        self.index, self.encoder = init_cache()

        # Set Euclidean distance threshold
        # a distance of 0 means identicals sentences
        # We only return from cache sentences under this thresold
        self.euclidean_threshold = thresold

        self.json_file = json_file
        self.cache = retrieve_cache(self.json_file)

    def query_database(self, query_text):
        results = self.retriever.get_relevant_documents(query_text)
        return results

    def get_relevant_documents(self, query: str, use_cache=True) -> str:
        # Method to retrieve an answer from the cache or generate a new one
        start_time = time.time()
        try:
            # First we obtain the embeddings corresponding to the user query
            embedding = self.encoder.encode([query])

            # Search for the nearest neighbor in the index
            self.index.nprobe = 8
            D, I = self.index.search(embedding, 1)

            if use_cache:
                if D[0] >= 0:
                    if I[0][0] >= 0 and D[0][0] <= self.euclidean_threshold:
                        row_id = int(I[0][0])

                        print("Answer recovered from Cache. ")
                        print(f"{D[0][0]:.3f} smaller than {self.euclidean_threshold}")
                        print(f"Found cache in row: {row_id} with score {D[0][0]:.3f}")

                        end_time = time.time()
                        elapsed_time = end_time - start_time
                        print(f"Time taken: {elapsed_time:.3f} seconds")
                        return [Document(**doc) for doc in self.cache["answers"][row_id]]

            # Handle the case when there are not enough results
            # or Euclidean distance is not met, asking to chromaDB.
            answer = self.query_database(query)
            # response_text = answer["documents"][0][0]

            self.cache["query"].append(query)
            self.cache["embeddings"].append(embedding[0].tolist())
            self.cache["answers"].append([doc.__dict__ for doc in answer])


            self.index.add(embedding)
            store_cache(self.json_file, self.cache)
            end_time = time.time()
            elapsed_time = end_time - start_time
            print(f"Time taken: {elapsed_time:.3f} seconds")

            return answer 
        except Exception as e:
            raise RuntimeError(f"Error during 'get_relevant_documents' method: {e}")