Spaces:
Runtime error
Runtime error
Chintan Donda
commited on
Commit
•
3d0d57c
1
Parent(s):
3fa36c8
Replace GPTSimpleVectorIndex import with GPTVectorStoreIndex
Browse files- src/data_loader.py +2 -2
- src/langchain_utils.py +12 -12
src/data_loader.py
CHANGED
@@ -4,7 +4,7 @@ import pandas as pd
|
|
4 |
from pathlib import Path
|
5 |
import glob
|
6 |
|
7 |
-
from llama_index import
|
8 |
from langchain.document_loaders import PyPDFLoader, TextLoader
|
9 |
from langchain.agents import initialize_agent, Tool
|
10 |
from langchain.llms import OpenAI
|
@@ -170,7 +170,7 @@ class DATA_LOADER:
|
|
170 |
# Load data from URLs
|
171 |
documents = loader.load_data(urls=urls)
|
172 |
# Build the Vector database
|
173 |
-
index =
|
174 |
tools = [
|
175 |
Tool(
|
176 |
name="Website Index",
|
|
|
4 |
from pathlib import Path
|
5 |
import glob
|
6 |
|
7 |
+
from llama_index import GPTVectorStoreIndex, download_loader, SimpleDirectoryReader, SimpleWebPageReader
|
8 |
from langchain.document_loaders import PyPDFLoader, TextLoader
|
9 |
from langchain.agents import initialize_agent, Tool
|
10 |
from langchain.llms import OpenAI
|
|
|
170 |
# Load data from URLs
|
171 |
documents = loader.load_data(urls=urls)
|
172 |
# Build the Vector database
|
173 |
+
index = GPTVectorStoreIndex(documents)
|
174 |
tools = [
|
175 |
Tool(
|
176 |
name="Website Index",
|
src/langchain_utils.py
CHANGED
@@ -13,7 +13,7 @@ import chromadb
|
|
13 |
from langchain.chains.question_answering import load_qa_chain
|
14 |
from langchain.chains.qa_with_sources import load_qa_with_sources_chain
|
15 |
from langchain.prompts import PromptTemplate
|
16 |
-
from llama_index import
|
17 |
from langchain.vectorstores import FAISS
|
18 |
|
19 |
import pickle
|
@@ -365,8 +365,8 @@ class LANGCHAIN_UTILS:
|
|
365 |
)
|
366 |
|
367 |
# Vector store using GPT vector index
|
368 |
-
elif self.index_type == '
|
369 |
-
self.index =
|
370 |
|
371 |
logger.info(f'Index created successfully!')
|
372 |
return self.index
|
@@ -522,7 +522,7 @@ class LANGCHAIN_UTILS:
|
|
522 |
elif self.index_type == 'Chroma':
|
523 |
index.persist()
|
524 |
|
525 |
-
elif self.index_type == '
|
526 |
index.save_to_disk(index_filepath)
|
527 |
|
528 |
elif self.index_type == 'pickle':
|
@@ -550,8 +550,8 @@ class LANGCHAIN_UTILS:
|
|
550 |
embedding_function=self.embeddings
|
551 |
)
|
552 |
|
553 |
-
elif self.index_type == '
|
554 |
-
self.index =
|
555 |
|
556 |
elif self.index_type == 'pickle':
|
557 |
with open(self.index_filepath, "rb") as f:
|
@@ -579,9 +579,9 @@ class LANGCHAIN_UTILS:
|
|
579 |
url_documents
|
580 |
):
|
581 |
# Build the Vector store for docs
|
582 |
-
doc_index =
|
583 |
# Build the Vector store for URLs
|
584 |
-
url_index =
|
585 |
|
586 |
# Set summary of each index
|
587 |
doc_index.set_text("index_from_docs")
|
@@ -628,10 +628,10 @@ class LANGCHAIN_UTILS:
|
|
628 |
continue
|
629 |
raise NotImplementedError
|
630 |
|
631 |
-
elif self.index_type == '
|
632 |
for doc_type, index in doc_type_indices.items():
|
633 |
-
if not index or not isinstance(index,
|
634 |
-
logger.warning(f'{doc_type} index to be merged is not an instance of type llama_index.
|
635 |
continue
|
636 |
raise NotImplementedError
|
637 |
|
@@ -715,7 +715,7 @@ class LANGCHAIN_UTILS:
|
|
715 |
k=similarity_top_k
|
716 |
)
|
717 |
|
718 |
-
elif self.index_type == '
|
719 |
# Querying the index
|
720 |
response = index.query(
|
721 |
question,
|
|
|
13 |
from langchain.chains.question_answering import load_qa_chain
|
14 |
from langchain.chains.qa_with_sources import load_qa_with_sources_chain
|
15 |
from langchain.prompts import PromptTemplate
|
16 |
+
from llama_index import GPTVectorStoreIndex, GPTListIndex
|
17 |
from langchain.vectorstores import FAISS
|
18 |
|
19 |
import pickle
|
|
|
365 |
)
|
366 |
|
367 |
# Vector store using GPT vector index
|
368 |
+
elif self.index_type == 'GPTVectorStoreIndex':
|
369 |
+
self.index = GPTVectorStoreIndex.from_documents(self.documents)
|
370 |
|
371 |
logger.info(f'Index created successfully!')
|
372 |
return self.index
|
|
|
522 |
elif self.index_type == 'Chroma':
|
523 |
index.persist()
|
524 |
|
525 |
+
elif self.index_type == 'GPTVectorStoreIndex':
|
526 |
index.save_to_disk(index_filepath)
|
527 |
|
528 |
elif self.index_type == 'pickle':
|
|
|
550 |
embedding_function=self.embeddings
|
551 |
)
|
552 |
|
553 |
+
elif self.index_type == 'GPTVectorStoreIndex':
|
554 |
+
self.index = GPTVectorStoreIndex.load_from_disk(self.index_filepath)
|
555 |
|
556 |
elif self.index_type == 'pickle':
|
557 |
with open(self.index_filepath, "rb") as f:
|
|
|
579 |
url_documents
|
580 |
):
|
581 |
# Build the Vector store for docs
|
582 |
+
doc_index = GPTVectorStoreIndex.from_documents(doc_documents)
|
583 |
# Build the Vector store for URLs
|
584 |
+
url_index = GPTVectorStoreIndex.from_documents(url_documents)
|
585 |
|
586 |
# Set summary of each index
|
587 |
doc_index.set_text("index_from_docs")
|
|
|
628 |
continue
|
629 |
raise NotImplementedError
|
630 |
|
631 |
+
elif self.index_type == 'GPTVectorStoreIndex':
|
632 |
for doc_type, index in doc_type_indices.items():
|
633 |
+
if not index or not isinstance(index, GPTVectorStoreIndex):
|
634 |
+
logger.warning(f'{doc_type} index to be merged is not an instance of type llama_index.GPTVectorStoreIndex')
|
635 |
continue
|
636 |
raise NotImplementedError
|
637 |
|
|
|
715 |
k=similarity_top_k
|
716 |
)
|
717 |
|
718 |
+
elif self.index_type == 'GPTVectorStoreIndex':
|
719 |
# Querying the index
|
720 |
response = index.query(
|
721 |
question,
|