Spaces:
Running
Running
update embedding
Browse files- Dockerfile +4 -4
- app/rag.py +16 -4
- start_service.sh +8 -8
Dockerfile
CHANGED
@@ -16,13 +16,13 @@ COPY ./start_service.sh /code/start_service.sh
|
|
16 |
#
|
17 |
COPY ./app /code/app
|
18 |
|
19 |
-
RUN useradd -m docker && echo "docker:docker" | chpasswd && adduser docker sudo
|
20 |
|
21 |
-
RUN curl -fsSL https://ollama.com/install.sh | sh
|
22 |
|
23 |
-
USER docker
|
24 |
|
25 |
-
RUN nohup ollama serve & sleep 5
|
26 |
|
27 |
#
|
28 |
# RUN chmod +x /code/start_service.sh
|
|
|
16 |
#
|
17 |
COPY ./app /code/app
|
18 |
|
19 |
+
# RUN useradd -m docker && echo "docker:docker" | chpasswd && adduser docker sudo
|
20 |
|
21 |
+
# RUN curl -fsSL https://ollama.com/install.sh | sh
|
22 |
|
23 |
+
# USER docker
|
24 |
|
25 |
+
# RUN nohup ollama serve & sleep 5
|
26 |
|
27 |
#
|
28 |
# RUN chmod +x /code/start_service.sh
|
app/rag.py
CHANGED
@@ -1,3 +1,6 @@
|
|
|
|
|
|
|
|
1 |
from llama_index.core import (
|
2 |
SimpleDirectoryReader,
|
3 |
# VectorStoreIndex,
|
@@ -16,12 +19,14 @@ from llama_index.core.vector_stores import VectorStoreQuery
|
|
16 |
from llama_index.core.indices.vector_store.base import VectorStoreIndex
|
17 |
from llama_index.vector_stores.qdrant import QdrantVectorStore
|
18 |
from qdrant_client import QdrantClient
|
19 |
-
import logging
|
20 |
|
21 |
from llama_index.llms.llama_cpp import LlamaCPP
|
22 |
from llama_index.embeddings.fastembed import FastEmbedEmbedding
|
23 |
|
24 |
|
|
|
|
|
|
|
25 |
class ChatPDF:
|
26 |
logging.basicConfig(level=logging.INFO)
|
27 |
logger = logging.getLogger(__name__)
|
@@ -56,11 +61,18 @@ class ChatPDF:
|
|
56 |
|
57 |
self.logger.info("initializing the vector store related objects")
|
58 |
# client = QdrantClient(host="localhost", port=6333)
|
|
|
59 |
client = QdrantClient(":memory:")
|
60 |
-
self.vector_store = QdrantVectorStore(
|
|
|
|
|
|
|
|
|
61 |
|
62 |
self.logger.info("initializing the FastEmbedEmbedding")
|
63 |
-
self.embed_model = FastEmbedEmbedding(
|
|
|
|
|
64 |
|
65 |
llm = LlamaCPP(
|
66 |
model_url=self.model_url,
|
@@ -124,7 +136,7 @@ class ChatPDF:
|
|
124 |
retriever = VectorIndexRetriever(
|
125 |
index=index,
|
126 |
similarity_top_k=6,
|
127 |
-
vector_store_query_mode="hybrid"
|
128 |
)
|
129 |
|
130 |
self.logger.info("configure response synthesizer")
|
|
|
1 |
+
import os
|
2 |
+
import logging
|
3 |
+
|
4 |
from llama_index.core import (
|
5 |
SimpleDirectoryReader,
|
6 |
# VectorStoreIndex,
|
|
|
19 |
from llama_index.core.indices.vector_store.base import VectorStoreIndex
|
20 |
from llama_index.vector_stores.qdrant import QdrantVectorStore
|
21 |
from qdrant_client import QdrantClient
|
|
|
22 |
|
23 |
from llama_index.llms.llama_cpp import LlamaCPP
|
24 |
from llama_index.embeddings.fastembed import FastEmbedEmbedding
|
25 |
|
26 |
|
27 |
+
QDRANT_API_URL = os.getenv('QDRANT_API_URL')
|
28 |
+
QDRANT_API_KEY = os.getenv('QDRANT_API_KEY')
|
29 |
+
|
30 |
class ChatPDF:
|
31 |
logging.basicConfig(level=logging.INFO)
|
32 |
logger = logging.getLogger(__name__)
|
|
|
61 |
|
62 |
self.logger.info("initializing the vector store related objects")
|
63 |
# client = QdrantClient(host="localhost", port=6333)
|
64 |
+
# client = QdrantClient(url=QDRANT_API_URL, api_key=QDRANT_API_KEY)
|
65 |
client = QdrantClient(":memory:")
|
66 |
+
self.vector_store = QdrantVectorStore(
|
67 |
+
client=client,
|
68 |
+
collection_name="rag_documents",
|
69 |
+
# enable_hybrid=True
|
70 |
+
)
|
71 |
|
72 |
self.logger.info("initializing the FastEmbedEmbedding")
|
73 |
+
self.embed_model = FastEmbedEmbedding(
|
74 |
+
# model_name="BAAI/bge-small-en"
|
75 |
+
)
|
76 |
|
77 |
llm = LlamaCPP(
|
78 |
model_url=self.model_url,
|
|
|
136 |
retriever = VectorIndexRetriever(
|
137 |
index=index,
|
138 |
similarity_top_k=6,
|
139 |
+
# vector_store_query_mode="hybrid"
|
140 |
)
|
141 |
|
142 |
self.logger.info("configure response synthesizer")
|
start_service.sh
CHANGED
@@ -1,16 +1,16 @@
|
|
1 |
#!/bin/sh
|
2 |
|
3 |
-
# Start Ollama in the background
|
4 |
-
ollama serve &
|
5 |
|
6 |
-
# Wait for Ollama to start
|
7 |
-
sleep 5
|
8 |
|
9 |
-
#
|
10 |
-
ollama pull mxbai-embed-large
|
11 |
|
12 |
-
# Pull and run <YOUR_MODEL_NAME>
|
13 |
-
ollama pull qwen:1.8b
|
14 |
|
15 |
#
|
16 |
fastapi run /code/app/main.py --port 7860
|
|
|
1 |
#!/bin/sh
|
2 |
|
3 |
+
# # Start Ollama in the background
|
4 |
+
# ollama serve &
|
5 |
|
6 |
+
# # Wait for Ollama to start
|
7 |
+
# sleep 5
|
8 |
|
9 |
+
# #
|
10 |
+
# ollama pull mxbai-embed-large
|
11 |
|
12 |
+
# # Pull and run <YOUR_MODEL_NAME>
|
13 |
+
# ollama pull qwen:1.8b
|
14 |
|
15 |
#
|
16 |
fastapi run /code/app/main.py --port 7860
|