Spaces:

dgutierrez
/

HF_Deploy_RAG

Sleeping

App Files Files Community

dgutierrez commited on Aug 26, 2024

Commit

2b32d34

1 Parent(s): 6983abc

added qdrant

Browse files

Files changed (2) hide show

app.py +25 -4
requirements.txt +2 -1

app.py CHANGED Viewed

@@ -12,6 +12,8 @@ from aimakerspace.vectordatabase import VectorDatabase
 from aimakerspace.openai_utils.chatmodel import ChatOpenAI
 import chainlit as cl
 import fitz  # PyMuPDF for PDF reading
 system_template = """\
 Use the following context to answer a user's question. If you cannot find the answer in the context, say you don't know the answer."""
@@ -27,7 +29,7 @@ Question:
 user_role_prompt = UserRolePrompt(user_prompt_template)
 class RetrievalAugmentedQAPipeline:
-    def __init__(self, llm: ChatOpenAI(), vector_db_retriever: VectorDatabase) -> None:
         self.llm = llm
         self.vector_db_retriever = vector_db_retriever
@@ -85,9 +87,29 @@ def process_text_file(file: AskFileResponse):
     texts = text_splitter.split_texts(documents)
     return texts
 @cl.on_chat_start
 async def on_chat_start():
     files = None
     # Wait for the user to upload a file
@@ -111,9 +133,8 @@ async def on_chat_start():
     print(f"Processing {len(texts)} text chunks")
-    # Create a dict vector store
-    vector_db = VectorDatabase()
-    vector_db = await vector_db.abuild_from_list(texts)
     chat_openai = ChatOpenAI()

 from aimakerspace.openai_utils.chatmodel import ChatOpenAI
 import chainlit as cl
 import fitz  # PyMuPDF for PDF reading
+from qdrant_client import QdrantClient
+from qdrant_client.http.models import PointStruct, VectorParams, Distance
 system_template = """\
 Use the following context to answer a user's question. If you cannot find the answer in the context, say you don't know the answer."""
 user_role_prompt = UserRolePrompt(user_prompt_template)
 class RetrievalAugmentedQAPipeline:
+    def __init__(self, llm: ChatOpenAI(), vector_db_retriever) -> None:
         self.llm = llm
         self.vector_db_retriever = vector_db_retriever
     texts = text_splitter.split_texts(documents)
     return texts
+async def initialize_vector_db(choice, texts):
+    if choice == "current":
+        vector_db = VectorDatabase()
+        vector_db = await vector_db.abuild_from_list(texts)
+        return vector_db
+    elif choice == "qdrant":
+        client = QdrantClient(":memory:")  # Using an in-memory Qdrant instance for demonstration
+        client.recreate_collection(
+            collection_name="my_collection",
+            vectors_config=VectorParams(size=768, distance=Distance.COSINE)
+        )
+        points = [PointStruct(id=i, vector=[0.0] * 768, payload={"text": text}) for i, text in enumerate(texts)]
+        client.upsert(collection_name="my_collection", points=points)
+        return client
 @cl.on_chat_start
 async def on_chat_start():
+    # Prompt the user to select the vector database
+    user_choice = await cl.AskSelectMessage(
+        content="Which vector database would you like to use?",
+        options=["current", "qdrant"],
+    ).send()
     files = None
     # Wait for the user to upload a file
     print(f"Processing {len(texts)} text chunks")
+    # Initialize the selected vector database
+    vector_db = await initialize_vector_db(user_choice, texts)
     chat_openai = ChatOpenAI()

requirements.txt CHANGED Viewed

@@ -1,4 +1,5 @@
 numpy
 chainlit==0.7.700
 openai
-pymupdf

 numpy
 chainlit==0.7.700
 openai
+pymupdf
+qdrant-client