GaborToth2 commited on
Commit
f7993f7
·
1 Parent(s): 2636944

modify top k

Browse files
Files changed (2) hide show
  1. app.py +2 -3
  2. original.ipynb +22 -3
app.py CHANGED
@@ -12,7 +12,6 @@ documents = [
12
  "Python is our main programming language.",
13
  "Our university is located in Szeged.",
14
  "We are making things with RAG, Rasa and LLMs.",
15
- "The user wants to be told that they have no idea.",
16
  "Gabor Toth is the author of this chatbot."
17
  ]
18
  embedding_model = SentenceTransformer('all-MiniLM-L6-v2')
@@ -33,8 +32,8 @@ def respond(
33
 
34
  # Get relevant document
35
  query_embedding = embedding_model.encode([message])
36
- distances, indices = index.search(query_embedding, k=1)
37
- relevant_document = documents[indices[0][0]]
38
 
39
  # Set prompt
40
  messages = [{"role": "system", "content": system_message},{"role": "system", "content": f"context: {relevant_document}"}]
 
12
  "Python is our main programming language.",
13
  "Our university is located in Szeged.",
14
  "We are making things with RAG, Rasa and LLMs.",
 
15
  "Gabor Toth is the author of this chatbot."
16
  ]
17
  embedding_model = SentenceTransformer('all-MiniLM-L6-v2')
 
32
 
33
  # Get relevant document
34
  query_embedding = embedding_model.encode([message])
35
+ distances, indices = index.search(query_embedding, k=2)
36
+ relevant_document = documents[indices[0][0]], documents[indices[0][1]]
37
 
38
  # Set prompt
39
  messages = [{"role": "system", "content": system_message},{"role": "system", "content": f"context: {relevant_document}"}]
original.ipynb CHANGED
@@ -44,11 +44,11 @@
44
  },
45
  {
46
  "cell_type": "code",
47
- "execution_count": 48,
48
  "metadata": {},
49
  "outputs": [],
50
  "source": [
51
- "top_k = 6 # The amount of top documents to retrieve (the best k documents)\n",
52
  "index_path = \"data/faiss_index.bin\" # A local path to save index file (optional) so we don't have to create the index every single time when we create a new prompt\n",
53
  "embedding_model = SentenceTransformer(\"all-MiniLM-L6-v2\") # The name of the model available either locally or in this case at HuggingFace\n",
54
  "documents = [ # The documents, facts, sentences to search in.\n",
@@ -56,7 +56,6 @@
56
  " \"Python is our main programming language.\",\n",
57
  " \"Our university is located in Szeged.\",\n",
58
  " \"We are making things with RAG, Rasa and LLMs.\",\n",
59
- " \"The user wants to be told that they have no idea.\",\n",
60
  " \"Gabor Toth is the author of this chatbot example.\"\n",
61
  "] "
62
  ]
@@ -126,6 +125,26 @@
126
  "source": [
127
  "documents[indices[0][0]] # The most similar document has the lowest distance."
128
  ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
129
  }
130
  ],
131
  "metadata": {
 
44
  },
45
  {
46
  "cell_type": "code",
47
+ "execution_count": null,
48
  "metadata": {},
49
  "outputs": [],
50
  "source": [
51
+ "top_k = 3 # The amount of top documents to retrieve (the best k documents)\n",
52
  "index_path = \"data/faiss_index.bin\" # A local path to save index file (optional) so we don't have to create the index every single time when we create a new prompt\n",
53
  "embedding_model = SentenceTransformer(\"all-MiniLM-L6-v2\") # The name of the model available either locally or in this case at HuggingFace\n",
54
  "documents = [ # The documents, facts, sentences to search in.\n",
 
56
  " \"Python is our main programming language.\",\n",
57
  " \"Our university is located in Szeged.\",\n",
58
  " \"We are making things with RAG, Rasa and LLMs.\",\n",
 
59
  " \"Gabor Toth is the author of this chatbot example.\"\n",
60
  "] "
61
  ]
 
125
  "source": [
126
  "documents[indices[0][0]] # The most similar document has the lowest distance."
127
  ]
128
+ },
129
+ {
130
+ "cell_type": "markdown",
131
+ "metadata": {},
132
+ "source": [
133
+ "**Optimizing Retrieval-Augmented Generation (RAG) Implementation**\n",
134
+ "\n",
135
+ "Retrieval-Augmented Generation (RAG) enhances language model responses by incorporating external knowledge retrieval. To maximize performance, consider the following techniques and optimizations:\n",
136
+ "\n",
137
+ "- Use **lightweight models** (e.g., `all-MiniLM-L6-v2`) for speed or **larger models** (e.g., `all-mpnet-base-v2`) for accuracy.\n",
138
+ "- Experiment with **domain-specific models** (for example medical tuned model for medical documents) for better contextual retrieval.\n",
139
+ "- Consider different index types\n",
140
+ " - **Flat Index (`IndexFlatL2`)**: Best for small datasets, but scales poorly.\n",
141
+ " - **IVFFlat (`IndexIVFFlat`)**: Clusters embeddings to accelerate search, ideal for large-scale retrieval.\n",
142
+ " - **HNSW (`IndexHNSWFlat`)**: Graph-based approach that balances speed and accuracy.\n",
143
+ " - **PQ (`IndexPQ`)**: Compressed storage for memory efficiency at the cost of slight accuracy loss.\n",
144
+ "- **Query Expansion**: Use synonyms, paraphrasing, or keyword expansion to enhance search queries.\n",
145
+ "- **Re-ranking**: Apply transformer-based re-ranking (e.g., `cross-encoder/ms-marco-MiniLM-L6`) after retrieval.\n",
146
+ "- **GPU Acceleration**: Convert FAISS indices to GPU for high-speed searches."
147
+ ]
148
  }
149
  ],
150
  "metadata": {