import os import faiss import warnings import nest_asyncio from dotenv import load_dotenv from llama_parse import LlamaParse from llama_index.core import Settings from llama_index.vector_stores.faiss import FaissVectorStore from llama_index.embeddings.huggingface import HuggingFaceEmbedding from llama_index.core import VectorStoreIndex, StorageContext load_dotenv() nest_asyncio.apply() warnings.filterwarnings("ignore") def get_data(file_path): parser = LlamaParse( api_key=os.getenv('LLAMA_CLOUD_API_KEY'), result_type="markdown" ) docs = parser.load_data(file_path) d = 384 faiss_index = faiss.IndexFlatL2(d) embedding_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5") Settings.embed_model = embedding_model vector_store = FaissVectorStore(faiss_index=faiss_index) storage_context = StorageContext.from_defaults(vector_store=vector_store) index = VectorStoreIndex.from_documents( docs, storage_context=storage_context ) index.storage_context.persist() print("Data Parsed Successfully!!")