File size: 1,286 Bytes
44d337e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
54c5b76
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
import os
import zipfile
from huggingface_hub import hf_hub_download
from sentence_transformers import SentenceTransformer
from langchain_chroma import Chroma
import torch
import gradio as gr


class SentenceTransformerWrapper:
    def __init__(self, model_name):
        self.model = SentenceTransformer(model_name)

    def embed_documents(self, texts):
        # Convert the list of texts to embeddings
        return self.model.encode(texts, show_progress_bar=True).tolist()

    def embed_query(self, text):
        # Convert a single query to its embedding
        return self.model.encode(text).tolist()
    
persist_directory = "chroma_db"
embedding_model = SentenceTransformerWrapper("intfloat/e5-base-v2")

vector_db = Chroma(
    persist_directory=persist_directory,
    embedding_function=embedding_model,
)

def retrieve_info(query, k=5):
    results = vector_db.similarity_search(query, k)
    output = ""
    for i, doc in enumerate(results):
        output += f"Result {i+1}:\n Metadata: {doc.metadata}\n Content: {doc.page_content[:1000]}\n\n"
    return output


demo = gr.Interface(
    fn=retrieve_info,
    inputs=["text", gr.Number(label="k (Number of chunks to retrieve)")],
    outputs=[
        gr.Textbox(label="Output from DenseRetriever", lines=25),
    ],
)