Spaces:
Sleeping
Sleeping
Commit
·
195e6d9
1
Parent(s):
9baa2cb
QDRANT ADDITION
Browse files- Dockerfile +4 -1
- app.py +33 -4
- requirements.txt +2 -1
Dockerfile
CHANGED
@@ -1,4 +1,6 @@
|
|
1 |
FROM python:3.9
|
|
|
|
|
2 |
RUN useradd -m -u 1000 user
|
3 |
USER user
|
4 |
ENV HOME=/home/user \
|
@@ -8,4 +10,5 @@ COPY --chown=user . $HOME/app
|
|
8 |
COPY ./requirements.txt ~/app/requirements.txt
|
9 |
RUN pip install -r requirements.txt
|
10 |
COPY . .
|
11 |
-
|
|
|
|
1 |
FROM python:3.9
|
2 |
+
RUN apt-get update && apt-get install -y curl
|
3 |
+
RUN curl -L https://github.com/qdrant/qdrant/releases/latest/download/qdrant-x86_64-unknown-linux-gnu.tar.gz | tar xvz -C /usr/local/bin
|
4 |
RUN useradd -m -u 1000 user
|
5 |
USER user
|
6 |
ENV HOME=/home/user \
|
|
|
10 |
COPY ./requirements.txt ~/app/requirements.txt
|
11 |
RUN pip install -r requirements.txt
|
12 |
COPY . .
|
13 |
+
RUN echo '#!/bin/bash\nqdrant &\nchainlit run app.py --port 7860' > start.sh && chmod +x start.sh
|
14 |
+
CMD ["./start.sh"]
|
app.py
CHANGED
@@ -3,19 +3,48 @@ from typing import List
|
|
3 |
from chainlit.types import AskFileResponse
|
4 |
from aimakerspace.text_utils import CharacterTextSplitter, TextFileLoader
|
5 |
from aimakerspace.openai_utils.prompts import UserRolePrompt, SystemRolePrompt
|
6 |
-
from aimakerspace.vectordatabase import VectorDatabase
|
7 |
from aimakerspace.openai_utils.chatmodel import ChatOpenAI
|
|
|
8 |
import chainlit as cl
|
9 |
from PyPDF2 import PdfReader
|
|
|
|
|
10 |
|
11 |
system_template = "Use the following context to answer a users question. If you cannot find the answer in the context, say you don't know the answer."
|
12 |
system_role_prompt = SystemRolePrompt(system_template)
|
13 |
-
|
14 |
user_prompt_template = "Context:\n{context}\n\nQuestion:\n{question}"
|
15 |
user_role_prompt = UserRolePrompt(user_prompt_template)
|
16 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
17 |
class RetrievalAugmentedQAPipeline:
|
18 |
-
def __init__(self, llm: ChatOpenAI(), vector_db_retriever:
|
19 |
self.llm = llm
|
20 |
self.vector_db_retriever = vector_db_retriever
|
21 |
|
@@ -69,7 +98,7 @@ async def on_chat_start():
|
|
69 |
texts = process_file(file)
|
70 |
print(f"Processing {len(texts)} text chunks")
|
71 |
|
72 |
-
vector_db =
|
73 |
vector_db = await vector_db.abuild_from_list(texts)
|
74 |
|
75 |
chat_openai = ChatOpenAI()
|
|
|
3 |
from chainlit.types import AskFileResponse
|
4 |
from aimakerspace.text_utils import CharacterTextSplitter, TextFileLoader
|
5 |
from aimakerspace.openai_utils.prompts import UserRolePrompt, SystemRolePrompt
|
|
|
6 |
from aimakerspace.openai_utils.chatmodel import ChatOpenAI
|
7 |
+
from aimakerspace.openai_utils.embedding import EmbeddingModel
|
8 |
import chainlit as cl
|
9 |
from PyPDF2 import PdfReader
|
10 |
+
from qdrant_client import QdrantClient
|
11 |
+
from qdrant_client.http import models
|
12 |
|
13 |
system_template = "Use the following context to answer a users question. If you cannot find the answer in the context, say you don't know the answer."
|
14 |
system_role_prompt = SystemRolePrompt(system_template)
|
|
|
15 |
user_prompt_template = "Context:\n{context}\n\nQuestion:\n{question}"
|
16 |
user_role_prompt = UserRolePrompt(user_prompt_template)
|
17 |
|
18 |
+
class QdrantVectorStore:
|
19 |
+
def __init__(self, collection_name="my_collection"):
|
20 |
+
self.client = QdrantClient(":memory:")
|
21 |
+
self.collection_name = collection_name
|
22 |
+
self.embedding_model = EmbeddingModel()
|
23 |
+
|
24 |
+
async def abuild_from_list(self, texts: List[str]):
|
25 |
+
self.client.recreate_collection(
|
26 |
+
collection_name=self.collection_name,
|
27 |
+
vectors_config=models.VectorParams(size=1536, distance=models.Distance.COSINE),
|
28 |
+
)
|
29 |
+
for i, text in enumerate(texts):
|
30 |
+
vector = await self.embedding_model.aembed_query(text)
|
31 |
+
self.client.upsert(
|
32 |
+
collection_name=self.collection_name,
|
33 |
+
points=[models.PointStruct(id=i, vector=vector, payload={"text": text})]
|
34 |
+
)
|
35 |
+
return self
|
36 |
+
|
37 |
+
def search_by_text(self, query: str, k: int = 4):
|
38 |
+
vector = self.embedding_model.embed_query(query)
|
39 |
+
results = self.client.search(
|
40 |
+
collection_name=self.collection_name,
|
41 |
+
query_vector=vector,
|
42 |
+
limit=k
|
43 |
+
)
|
44 |
+
return [(hit.payload["text"], hit.score) for hit in results]
|
45 |
+
|
46 |
class RetrievalAugmentedQAPipeline:
|
47 |
+
def __init__(self, llm: ChatOpenAI(), vector_db_retriever: QdrantVectorStore) -> None:
|
48 |
self.llm = llm
|
49 |
self.vector_db_retriever = vector_db_retriever
|
50 |
|
|
|
98 |
texts = process_file(file)
|
99 |
print(f"Processing {len(texts)} text chunks")
|
100 |
|
101 |
+
vector_db = QdrantVectorStore()
|
102 |
vector_db = await vector_db.abuild_from_list(texts)
|
103 |
|
104 |
chat_openai = ChatOpenAI()
|
requirements.txt
CHANGED
@@ -3,4 +3,5 @@ chainlit==0.7.700
|
|
3 |
openai
|
4 |
chainlit
|
5 |
PyPDF2
|
6 |
-
openai
|
|
|
|
3 |
openai
|
4 |
chainlit
|
5 |
PyPDF2
|
6 |
+
openai
|
7 |
+
qdrant-client
|