Chris4K commited on
Commit
103a876
·
verified ·
1 Parent(s): 4cac457

Update rag_tool.py

Browse files
Files changed (1) hide show
  1. rag_tool.py +26 -7
rag_tool.py CHANGED
@@ -1,10 +1,10 @@
1
  import os
2
  from typing import Dict, List, Optional, Union, Any
3
  from smolagents import Tool
4
- from langchain.vectorstores import FAISS, Chroma
5
- from langchain.embeddings import HuggingFaceBgeEmbeddings, HuggingFaceEmbeddings
6
- from langchain.document_loaders import PyPDFLoader, TextLoader, DirectoryLoader
7
- from langchain.text_splitter import CharacterTextSplitter, RecursiveCharacterTextSplitter
8
  from PyPDF2 import PdfReader
9
  import json
10
 
@@ -23,11 +23,29 @@ class RAGTool(Tool):
23
  "top_k": {
24
  "type": "integer",
25
  "description": "Number of most relevant documents to retrieve (default: 3)",
 
26
  }
27
  }
28
  output_type = "string"
29
 
30
- def __init__(self,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
  documents_path: str = "./documents",
32
  embedding_model: str = "BAAI/bge-small-en-v1.5",
33
  vector_store_type: str = "faiss",
@@ -36,7 +54,7 @@ class RAGTool(Tool):
36
  persist_directory: str = "./vector_store",
37
  device: str = "cpu"):
38
  """
39
- Initialize the RAG Tool with configurable parameters.
40
 
41
  Args:
42
  documents_path: Path to documents or folder containing documents
@@ -47,7 +65,6 @@ class RAGTool(Tool):
47
  persist_directory: Directory to persist vector store
48
  device: Device to run embedding model on ('cpu' or 'cuda')
49
  """
50
- super().__init__()
51
  self.documents_path = documents_path
52
  self.embedding_model = embedding_model
53
  self.vector_store_type = vector_store_type
@@ -60,6 +77,8 @@ class RAGTool(Tool):
60
  os.makedirs(persist_directory, exist_ok=True)
61
  self._setup_vector_store()
62
 
 
 
63
  def _setup_vector_store(self):
64
  """Set up the vector store with documents if it doesn't exist"""
65
  # Check if we need to create a new vector store
 
1
  import os
2
  from typing import Dict, List, Optional, Union, Any
3
  from smolagents import Tool
4
+ from langchain_community.vectorstores import FAISS, Chroma
5
+ from langchain_community.embeddings import HuggingFaceBgeEmbeddings, HuggingFaceEmbeddings
6
+ from langchain_community.document_loaders import PyPDFLoader, TextLoader, DirectoryLoader
7
+ from langchain_text_splitters import CharacterTextSplitter, RecursiveCharacterTextSplitter
8
  from PyPDF2 import PdfReader
9
  import json
10
 
 
23
  "top_k": {
24
  "type": "integer",
25
  "description": "Number of most relevant documents to retrieve (default: 3)",
26
+ "nullable": True
27
  }
28
  }
29
  output_type = "string"
30
 
31
+ def __init__(self):
32
+ """
33
+ Initialize the RAG Tool with default settings.
34
+ All configuration is done via class attributes or through the configure method.
35
+ """
36
+ super().__init__()
37
+ self.documents_path = "./documents"
38
+ self.embedding_model = "BAAI/bge-small-en-v1.5"
39
+ self.vector_store_type = "faiss"
40
+ self.chunk_size = 1000
41
+ self.chunk_overlap = 200
42
+ self.persist_directory = "./vector_store"
43
+ self.device = "cpu"
44
+
45
+ # Don't automatically create storage initially, wait for explicit setup
46
+ self.vector_store = None
47
+
48
+ def configure(self,
49
  documents_path: str = "./documents",
50
  embedding_model: str = "BAAI/bge-small-en-v1.5",
51
  vector_store_type: str = "faiss",
 
54
  persist_directory: str = "./vector_store",
55
  device: str = "cpu"):
56
  """
57
+ Configure the RAG Tool with custom parameters.
58
 
59
  Args:
60
  documents_path: Path to documents or folder containing documents
 
65
  persist_directory: Directory to persist vector store
66
  device: Device to run embedding model on ('cpu' or 'cuda')
67
  """
 
68
  self.documents_path = documents_path
69
  self.embedding_model = embedding_model
70
  self.vector_store_type = vector_store_type
 
77
  os.makedirs(persist_directory, exist_ok=True)
78
  self._setup_vector_store()
79
 
80
+ return self
81
+
82
  def _setup_vector_store(self):
83
  """Set up the vector store with documents if it doesn't exist"""
84
  # Check if we need to create a new vector store