Spaces:
Running
Running
import os | |
import zipfile | |
from huggingface_hub import hf_hub_download | |
from sentence_transformers import SentenceTransformer | |
from langchain_chroma import Chroma | |
import torch | |
import gradio as gr | |
class SentenceTransformerWrapper: | |
def __init__(self, model_name): | |
self.model = SentenceTransformer(model_name) | |
def embed_documents(self, texts): | |
# Convert the list of texts to embeddings | |
return self.model.encode(texts, show_progress_bar=True).tolist() | |
def embed_query(self, text): | |
# Convert a single query to its embedding | |
return self.model.encode(text).tolist() | |
persist_directory = "chroma_db" | |
embedding_model = SentenceTransformerWrapper("intfloat/e5-base-v2") | |
vector_db = Chroma( | |
persist_directory=persist_directory, | |
embedding_function=embedding_model, | |
) | |
def retrieve_info(query, k=5): | |
results = vector_db.similarity_search(query, k) | |
output = "" | |
for i, doc in enumerate(results): | |
output += f"Result {i+1}:\n Metadata: {doc.metadata}\n Content: {doc.page_content[:1000]}\n\n" | |
return output | |
demo = gr.Interface( | |
fn=retrieve_info, | |
inputs=["text", gr.Number(label="k (Number of chunks to retrieve)")], | |
outputs=[ | |
gr.Textbox(label="Output from DenseRetriever", lines=25), | |
], | |
) |