import os import zipfile from huggingface_hub import hf_hub_download from sentence_transformers import SentenceTransformer from langchain_chroma import Chroma import torch import gradio as gr class SentenceTransformerWrapper: def __init__(self, model_name): self.model = SentenceTransformer(model_name) def embed_documents(self, texts): # Convert the list of texts to embeddings return self.model.encode(texts, show_progress_bar=True).tolist() def embed_query(self, text): # Convert a single query to its embedding return self.model.encode(text).tolist() persist_directory = "chroma_db" embedding_model = SentenceTransformerWrapper("intfloat/e5-base-v2") vector_db = Chroma( persist_directory=persist_directory, embedding_function=embedding_model, ) def retrieve_info(query, k=5): results = vector_db.similarity_search(query, k) output = "" for i, doc in enumerate(results): output += f"Result {i+1}:\n Metadata: {doc.metadata}\n Content: {doc.page_content[:1000]}\n\n" return output demo = gr.Interface( fn=retrieve_info, inputs=["text", gr.Number(label="k (Number of chunks to retrieve)")], outputs=[ gr.Textbox(label="Output from DenseRetriever", lines=25), ], )