Spaces:
Running
Running
File size: 1,286 Bytes
44d337e 54c5b76 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 |
import os
import zipfile
from huggingface_hub import hf_hub_download
from sentence_transformers import SentenceTransformer
from langchain_chroma import Chroma
import torch
import gradio as gr
class SentenceTransformerWrapper:
def __init__(self, model_name):
self.model = SentenceTransformer(model_name)
def embed_documents(self, texts):
# Convert the list of texts to embeddings
return self.model.encode(texts, show_progress_bar=True).tolist()
def embed_query(self, text):
# Convert a single query to its embedding
return self.model.encode(text).tolist()
persist_directory = "chroma_db"
embedding_model = SentenceTransformerWrapper("intfloat/e5-base-v2")
vector_db = Chroma(
persist_directory=persist_directory,
embedding_function=embedding_model,
)
def retrieve_info(query, k=5):
results = vector_db.similarity_search(query, k)
output = ""
for i, doc in enumerate(results):
output += f"Result {i+1}:\n Metadata: {doc.metadata}\n Content: {doc.page_content[:1000]}\n\n"
return output
demo = gr.Interface(
fn=retrieve_info,
inputs=["text", gr.Number(label="k (Number of chunks to retrieve)")],
outputs=[
gr.Textbox(label="Output from DenseRetriever", lines=25),
],
) |