Katanna941 commited on
Commit
195e6d9
·
1 Parent(s): 9baa2cb

QDRANT ADDITION

Browse files
Files changed (3) hide show
  1. Dockerfile +4 -1
  2. app.py +33 -4
  3. requirements.txt +2 -1
Dockerfile CHANGED
@@ -1,4 +1,6 @@
1
  FROM python:3.9
 
 
2
  RUN useradd -m -u 1000 user
3
  USER user
4
  ENV HOME=/home/user \
@@ -8,4 +10,5 @@ COPY --chown=user . $HOME/app
8
  COPY ./requirements.txt ~/app/requirements.txt
9
  RUN pip install -r requirements.txt
10
  COPY . .
11
- CMD ["chainlit", "run", "app.py", "--port", "7860"]
 
 
1
  FROM python:3.9
2
+ RUN apt-get update && apt-get install -y curl
3
+ RUN curl -L https://github.com/qdrant/qdrant/releases/latest/download/qdrant-x86_64-unknown-linux-gnu.tar.gz | tar xvz -C /usr/local/bin
4
  RUN useradd -m -u 1000 user
5
  USER user
6
  ENV HOME=/home/user \
 
10
  COPY ./requirements.txt ~/app/requirements.txt
11
  RUN pip install -r requirements.txt
12
  COPY . .
13
+ RUN echo '#!/bin/bash\nqdrant &\nchainlit run app.py --port 7860' > start.sh && chmod +x start.sh
14
+ CMD ["./start.sh"]
app.py CHANGED
@@ -3,19 +3,48 @@ from typing import List
3
  from chainlit.types import AskFileResponse
4
  from aimakerspace.text_utils import CharacterTextSplitter, TextFileLoader
5
  from aimakerspace.openai_utils.prompts import UserRolePrompt, SystemRolePrompt
6
- from aimakerspace.vectordatabase import VectorDatabase
7
  from aimakerspace.openai_utils.chatmodel import ChatOpenAI
 
8
  import chainlit as cl
9
  from PyPDF2 import PdfReader
 
 
10
 
11
  system_template = "Use the following context to answer a users question. If you cannot find the answer in the context, say you don't know the answer."
12
  system_role_prompt = SystemRolePrompt(system_template)
13
-
14
  user_prompt_template = "Context:\n{context}\n\nQuestion:\n{question}"
15
  user_role_prompt = UserRolePrompt(user_prompt_template)
16
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
  class RetrievalAugmentedQAPipeline:
18
- def __init__(self, llm: ChatOpenAI(), vector_db_retriever: VectorDatabase) -> None:
19
  self.llm = llm
20
  self.vector_db_retriever = vector_db_retriever
21
 
@@ -69,7 +98,7 @@ async def on_chat_start():
69
  texts = process_file(file)
70
  print(f"Processing {len(texts)} text chunks")
71
 
72
- vector_db = VectorDatabase()
73
  vector_db = await vector_db.abuild_from_list(texts)
74
 
75
  chat_openai = ChatOpenAI()
 
3
  from chainlit.types import AskFileResponse
4
  from aimakerspace.text_utils import CharacterTextSplitter, TextFileLoader
5
  from aimakerspace.openai_utils.prompts import UserRolePrompt, SystemRolePrompt
 
6
  from aimakerspace.openai_utils.chatmodel import ChatOpenAI
7
+ from aimakerspace.openai_utils.embedding import EmbeddingModel
8
  import chainlit as cl
9
  from PyPDF2 import PdfReader
10
+ from qdrant_client import QdrantClient
11
+ from qdrant_client.http import models
12
 
13
  system_template = "Use the following context to answer a users question. If you cannot find the answer in the context, say you don't know the answer."
14
  system_role_prompt = SystemRolePrompt(system_template)
 
15
  user_prompt_template = "Context:\n{context}\n\nQuestion:\n{question}"
16
  user_role_prompt = UserRolePrompt(user_prompt_template)
17
 
18
+ class QdrantVectorStore:
19
+ def __init__(self, collection_name="my_collection"):
20
+ self.client = QdrantClient(":memory:")
21
+ self.collection_name = collection_name
22
+ self.embedding_model = EmbeddingModel()
23
+
24
+ async def abuild_from_list(self, texts: List[str]):
25
+ self.client.recreate_collection(
26
+ collection_name=self.collection_name,
27
+ vectors_config=models.VectorParams(size=1536, distance=models.Distance.COSINE),
28
+ )
29
+ for i, text in enumerate(texts):
30
+ vector = await self.embedding_model.aembed_query(text)
31
+ self.client.upsert(
32
+ collection_name=self.collection_name,
33
+ points=[models.PointStruct(id=i, vector=vector, payload={"text": text})]
34
+ )
35
+ return self
36
+
37
+ def search_by_text(self, query: str, k: int = 4):
38
+ vector = self.embedding_model.embed_query(query)
39
+ results = self.client.search(
40
+ collection_name=self.collection_name,
41
+ query_vector=vector,
42
+ limit=k
43
+ )
44
+ return [(hit.payload["text"], hit.score) for hit in results]
45
+
46
  class RetrievalAugmentedQAPipeline:
47
+ def __init__(self, llm: ChatOpenAI(), vector_db_retriever: QdrantVectorStore) -> None:
48
  self.llm = llm
49
  self.vector_db_retriever = vector_db_retriever
50
 
 
98
  texts = process_file(file)
99
  print(f"Processing {len(texts)} text chunks")
100
 
101
+ vector_db = QdrantVectorStore()
102
  vector_db = await vector_db.abuild_from_list(texts)
103
 
104
  chat_openai = ChatOpenAI()
requirements.txt CHANGED
@@ -3,4 +3,5 @@ chainlit==0.7.700
3
  openai
4
  chainlit
5
  PyPDF2
6
- openai
 
 
3
  openai
4
  chainlit
5
  PyPDF2
6
+ openai
7
+ qdrant-client