rag-tool

Running

App Files Files Community

Chris4K commited on Apr 28

Commit

103a876

verified ·

1 Parent(s): 4cac457

Update rag_tool.py

Browse files

Files changed (1) hide show

rag_tool.py +26 -7

rag_tool.py CHANGED Viewed

@@ -1,10 +1,10 @@
 import os
 from typing import Dict, List, Optional, Union, Any
 from smolagents import Tool
-from langchain.vectorstores import FAISS, Chroma
-from langchain.embeddings import HuggingFaceBgeEmbeddings, HuggingFaceEmbeddings
-from langchain.document_loaders import PyPDFLoader, TextLoader, DirectoryLoader
-from langchain.text_splitter import CharacterTextSplitter, RecursiveCharacterTextSplitter
 from PyPDF2 import PdfReader
 import json
@@ -23,11 +23,29 @@ class RAGTool(Tool):
         "top_k": {
             "type": "integer",
             "description": "Number of most relevant documents to retrieve (default: 3)",
         }
     }
     output_type = "string"
-    def __init__(self,
                  documents_path: str = "./documents",
                  embedding_model: str = "BAAI/bge-small-en-v1.5",
                  vector_store_type: str = "faiss",
@@ -36,7 +54,7 @@ class RAGTool(Tool):
                  persist_directory: str = "./vector_store",
                  device: str = "cpu"):
         """
-        Initialize the RAG Tool with configurable parameters.
         Args:
             documents_path: Path to documents or folder containing documents
@@ -47,7 +65,6 @@ class RAGTool(Tool):
             persist_directory: Directory to persist vector store
             device: Device to run embedding model on ('cpu' or 'cuda')
         """
-        super().__init__()
         self.documents_path = documents_path
         self.embedding_model = embedding_model
         self.vector_store_type = vector_store_type
@@ -60,6 +77,8 @@ class RAGTool(Tool):
         os.makedirs(persist_directory, exist_ok=True)
         self._setup_vector_store()
     def _setup_vector_store(self):
         """Set up the vector store with documents if it doesn't exist"""
         # Check if we need to create a new vector store

 import os
 from typing import Dict, List, Optional, Union, Any
 from smolagents import Tool
+from langchain_community.vectorstores import FAISS, Chroma
+from langchain_community.embeddings import HuggingFaceBgeEmbeddings, HuggingFaceEmbeddings
+from langchain_community.document_loaders import PyPDFLoader, TextLoader, DirectoryLoader
+from langchain_text_splitters import CharacterTextSplitter, RecursiveCharacterTextSplitter
 from PyPDF2 import PdfReader
 import json
         "top_k": {
             "type": "integer",
             "description": "Number of most relevant documents to retrieve (default: 3)",
+            "nullable": True
         }
     }
     output_type = "string"
+    def __init__(self):
+        """
+        Initialize the RAG Tool with default settings.
+        All configuration is done via class attributes or through the configure method.
+        """
+        super().__init__()
+        self.documents_path = "./documents"
+        self.embedding_model = "BAAI/bge-small-en-v1.5"
+        self.vector_store_type = "faiss"
+        self.chunk_size = 1000
+        self.chunk_overlap = 200
+        self.persist_directory = "./vector_store"
+        self.device = "cpu"
+        # Don't automatically create storage initially, wait for explicit setup
+        self.vector_store = None
+    def configure(self,
                  documents_path: str = "./documents",
                  embedding_model: str = "BAAI/bge-small-en-v1.5",
                  vector_store_type: str = "faiss",
                  persist_directory: str = "./vector_store",
                  device: str = "cpu"):
         """
+        Configure the RAG Tool with custom parameters.
         Args:
             documents_path: Path to documents or folder containing documents
             persist_directory: Directory to persist vector store
             device: Device to run embedding model on ('cpu' or 'cuda')
         """
         self.documents_path = documents_path
         self.embedding_model = embedding_model
         self.vector_store_type = vector_store_type
         os.makedirs(persist_directory, exist_ok=True)
         self._setup_vector_store()
+        return self
     def _setup_vector_store(self):
         """Set up the vector store with documents if it doesn't exist"""
         # Check if we need to create a new vector store