Spaces:

Technocoloredgeek
/

AIE4_Assignment_3

Sleeping

App Files Files Community

Katanna941 commited on Aug 22, 2024

Commit

195e6d9

1 Parent(s): 9baa2cb

QDRANT ADDITION

Browse files

Files changed (3) hide show

Dockerfile +4 -1
app.py +33 -4
requirements.txt +2 -1

Dockerfile CHANGED Viewed

@@ -1,4 +1,6 @@
 FROM python:3.9
 RUN useradd -m -u 1000 user
 USER user
 ENV HOME=/home/user \
@@ -8,4 +10,5 @@ COPY --chown=user . $HOME/app
 COPY ./requirements.txt ~/app/requirements.txt
 RUN pip install -r requirements.txt
 COPY . .
-CMD ["chainlit", "run", "app.py", "--port", "7860"]

 FROM python:3.9
+RUN apt-get update && apt-get install -y curl
+RUN curl -L https://github.com/qdrant/qdrant/releases/latest/download/qdrant-x86_64-unknown-linux-gnu.tar.gz | tar xvz -C /usr/local/bin
 RUN useradd -m -u 1000 user
 USER user
 ENV HOME=/home/user \
 COPY ./requirements.txt ~/app/requirements.txt
 RUN pip install -r requirements.txt
 COPY . .
+RUN echo '#!/bin/bash\nqdrant &\nchainlit run app.py --port 7860' > start.sh && chmod +x start.sh
+CMD ["./start.sh"]

app.py CHANGED Viewed

@@ -3,19 +3,48 @@ from typing import List
 from chainlit.types import AskFileResponse
 from aimakerspace.text_utils import CharacterTextSplitter, TextFileLoader
 from aimakerspace.openai_utils.prompts import UserRolePrompt, SystemRolePrompt
-from aimakerspace.vectordatabase import VectorDatabase
 from aimakerspace.openai_utils.chatmodel import ChatOpenAI
 import chainlit as cl
 from PyPDF2 import PdfReader
 system_template = "Use the following context to answer a users question. If you cannot find the answer in the context, say you don't know the answer."
 system_role_prompt = SystemRolePrompt(system_template)
 user_prompt_template = "Context:\n{context}\n\nQuestion:\n{question}"
 user_role_prompt = UserRolePrompt(user_prompt_template)
 class RetrievalAugmentedQAPipeline:
-    def __init__(self, llm: ChatOpenAI(), vector_db_retriever: VectorDatabase) -> None:
         self.llm = llm
         self.vector_db_retriever = vector_db_retriever
@@ -69,7 +98,7 @@ async def on_chat_start():
     texts = process_file(file)
     print(f"Processing {len(texts)} text chunks")
-    vector_db = VectorDatabase()
     vector_db = await vector_db.abuild_from_list(texts)
     chat_openai = ChatOpenAI()

 from chainlit.types import AskFileResponse
 from aimakerspace.text_utils import CharacterTextSplitter, TextFileLoader
 from aimakerspace.openai_utils.prompts import UserRolePrompt, SystemRolePrompt
 from aimakerspace.openai_utils.chatmodel import ChatOpenAI
+from aimakerspace.openai_utils.embedding import EmbeddingModel
 import chainlit as cl
 from PyPDF2 import PdfReader
+from qdrant_client import QdrantClient
+from qdrant_client.http import models
 system_template = "Use the following context to answer a users question. If you cannot find the answer in the context, say you don't know the answer."
 system_role_prompt = SystemRolePrompt(system_template)
 user_prompt_template = "Context:\n{context}\n\nQuestion:\n{question}"
 user_role_prompt = UserRolePrompt(user_prompt_template)
+class QdrantVectorStore:
+    def __init__(self, collection_name="my_collection"):
+        self.client = QdrantClient(":memory:")
+        self.collection_name = collection_name
+        self.embedding_model = EmbeddingModel()
+    async def abuild_from_list(self, texts: List[str]):
+        self.client.recreate_collection(
+            collection_name=self.collection_name,
+            vectors_config=models.VectorParams(size=1536, distance=models.Distance.COSINE),
+        )
+        for i, text in enumerate(texts):
+            vector = await self.embedding_model.aembed_query(text)
+            self.client.upsert(
+                collection_name=self.collection_name,
+                points=[models.PointStruct(id=i, vector=vector, payload={"text": text})]
+            )
+        return self
+    def search_by_text(self, query: str, k: int = 4):
+        vector = self.embedding_model.embed_query(query)
+        results = self.client.search(
+            collection_name=self.collection_name,
+            query_vector=vector,
+            limit=k
+        )
+        return [(hit.payload["text"], hit.score) for hit in results]
 class RetrievalAugmentedQAPipeline:
+    def __init__(self, llm: ChatOpenAI(), vector_db_retriever: QdrantVectorStore) -> None:
         self.llm = llm
         self.vector_db_retriever = vector_db_retriever
     texts = process_file(file)
     print(f"Processing {len(texts)} text chunks")
+    vector_db = QdrantVectorStore()
     vector_db = await vector_db.abuild_from_list(texts)
     chat_openai = ChatOpenAI()

requirements.txt CHANGED Viewed

@@ -3,4 +3,5 @@ chainlit==0.7.700
 openai
 chainlit
 PyPDF2
-openai

 openai
 chainlit
 PyPDF2
+openai
+qdrant-client