Spaces:
Sleeping
Sleeping
Commit
·
f7993f7
1
Parent(s):
2636944
modify top k
Browse files- app.py +2 -3
- original.ipynb +22 -3
app.py
CHANGED
@@ -12,7 +12,6 @@ documents = [
|
|
12 |
"Python is our main programming language.",
|
13 |
"Our university is located in Szeged.",
|
14 |
"We are making things with RAG, Rasa and LLMs.",
|
15 |
-
"The user wants to be told that they have no idea.",
|
16 |
"Gabor Toth is the author of this chatbot."
|
17 |
]
|
18 |
embedding_model = SentenceTransformer('all-MiniLM-L6-v2')
|
@@ -33,8 +32,8 @@ def respond(
|
|
33 |
|
34 |
# Get relevant document
|
35 |
query_embedding = embedding_model.encode([message])
|
36 |
-
distances, indices = index.search(query_embedding, k=
|
37 |
-
relevant_document = documents[indices[0][0]]
|
38 |
|
39 |
# Set prompt
|
40 |
messages = [{"role": "system", "content": system_message},{"role": "system", "content": f"context: {relevant_document}"}]
|
|
|
12 |
"Python is our main programming language.",
|
13 |
"Our university is located in Szeged.",
|
14 |
"We are making things with RAG, Rasa and LLMs.",
|
|
|
15 |
"Gabor Toth is the author of this chatbot."
|
16 |
]
|
17 |
embedding_model = SentenceTransformer('all-MiniLM-L6-v2')
|
|
|
32 |
|
33 |
# Get relevant document
|
34 |
query_embedding = embedding_model.encode([message])
|
35 |
+
distances, indices = index.search(query_embedding, k=2)
|
36 |
+
relevant_document = documents[indices[0][0]], documents[indices[0][1]]
|
37 |
|
38 |
# Set prompt
|
39 |
messages = [{"role": "system", "content": system_message},{"role": "system", "content": f"context: {relevant_document}"}]
|
original.ipynb
CHANGED
@@ -44,11 +44,11 @@
|
|
44 |
},
|
45 |
{
|
46 |
"cell_type": "code",
|
47 |
-
"execution_count":
|
48 |
"metadata": {},
|
49 |
"outputs": [],
|
50 |
"source": [
|
51 |
-
"top_k =
|
52 |
"index_path = \"data/faiss_index.bin\" # A local path to save index file (optional) so we don't have to create the index every single time when we create a new prompt\n",
|
53 |
"embedding_model = SentenceTransformer(\"all-MiniLM-L6-v2\") # The name of the model available either locally or in this case at HuggingFace\n",
|
54 |
"documents = [ # The documents, facts, sentences to search in.\n",
|
@@ -56,7 +56,6 @@
|
|
56 |
" \"Python is our main programming language.\",\n",
|
57 |
" \"Our university is located in Szeged.\",\n",
|
58 |
" \"We are making things with RAG, Rasa and LLMs.\",\n",
|
59 |
-
" \"The user wants to be told that they have no idea.\",\n",
|
60 |
" \"Gabor Toth is the author of this chatbot example.\"\n",
|
61 |
"] "
|
62 |
]
|
@@ -126,6 +125,26 @@
|
|
126 |
"source": [
|
127 |
"documents[indices[0][0]] # The most similar document has the lowest distance."
|
128 |
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
129 |
}
|
130 |
],
|
131 |
"metadata": {
|
|
|
44 |
},
|
45 |
{
|
46 |
"cell_type": "code",
|
47 |
+
"execution_count": null,
|
48 |
"metadata": {},
|
49 |
"outputs": [],
|
50 |
"source": [
|
51 |
+
"top_k = 3 # The amount of top documents to retrieve (the best k documents)\n",
|
52 |
"index_path = \"data/faiss_index.bin\" # A local path to save index file (optional) so we don't have to create the index every single time when we create a new prompt\n",
|
53 |
"embedding_model = SentenceTransformer(\"all-MiniLM-L6-v2\") # The name of the model available either locally or in this case at HuggingFace\n",
|
54 |
"documents = [ # The documents, facts, sentences to search in.\n",
|
|
|
56 |
" \"Python is our main programming language.\",\n",
|
57 |
" \"Our university is located in Szeged.\",\n",
|
58 |
" \"We are making things with RAG, Rasa and LLMs.\",\n",
|
|
|
59 |
" \"Gabor Toth is the author of this chatbot example.\"\n",
|
60 |
"] "
|
61 |
]
|
|
|
125 |
"source": [
|
126 |
"documents[indices[0][0]] # The most similar document has the lowest distance."
|
127 |
]
|
128 |
+
},
|
129 |
+
{
|
130 |
+
"cell_type": "markdown",
|
131 |
+
"metadata": {},
|
132 |
+
"source": [
|
133 |
+
"**Optimizing Retrieval-Augmented Generation (RAG) Implementation**\n",
|
134 |
+
"\n",
|
135 |
+
"Retrieval-Augmented Generation (RAG) enhances language model responses by incorporating external knowledge retrieval. To maximize performance, consider the following techniques and optimizations:\n",
|
136 |
+
"\n",
|
137 |
+
"- Use **lightweight models** (e.g., `all-MiniLM-L6-v2`) for speed or **larger models** (e.g., `all-mpnet-base-v2`) for accuracy.\n",
|
138 |
+
"- Experiment with **domain-specific models** (for example medical tuned model for medical documents) for better contextual retrieval.\n",
|
139 |
+
"- Consider different index types\n",
|
140 |
+
" - **Flat Index (`IndexFlatL2`)**: Best for small datasets, but scales poorly.\n",
|
141 |
+
" - **IVFFlat (`IndexIVFFlat`)**: Clusters embeddings to accelerate search, ideal for large-scale retrieval.\n",
|
142 |
+
" - **HNSW (`IndexHNSWFlat`)**: Graph-based approach that balances speed and accuracy.\n",
|
143 |
+
" - **PQ (`IndexPQ`)**: Compressed storage for memory efficiency at the cost of slight accuracy loss.\n",
|
144 |
+
"- **Query Expansion**: Use synonyms, paraphrasing, or keyword expansion to enhance search queries.\n",
|
145 |
+
"- **Re-ranking**: Apply transformer-based re-ranking (e.g., `cross-encoder/ms-marco-MiniLM-L6`) after retrieval.\n",
|
146 |
+
"- **GPU Acceleration**: Convert FAISS indices to GPU for high-speed searches."
|
147 |
+
]
|
148 |
}
|
149 |
],
|
150 |
"metadata": {
|