from langchain_community.document_loaders import PyPDFLoader from datasets import load_dataset dataset = load_dataset("Namitg02/Test") print(dataset) from langchain.text_splitter import RecursiveCharacterTextSplitter splitter = RecursiveCharacterTextSplitter(chunk_size=100, chunk_overlap=15,separators=["\n\n", "\n", " ", ""]) docs = splitter.split_text(dataset) from langchain_community.embeddings import HuggingFaceEmbeddings embedding_model = HuggingFaceEmbedding(model_name="all-MiniLM-L6-v2") embeddings = model.encode(docs) from langchain.vectorstores import Chroma persist_directory = 'docs/chroma/' vectordb = Chroma.from_documents( documents=docs, embedding=embedding, persist_directory=persist_directory ) retriever = vectordb.as_retriever() import gradio as gr gr.load("models/HuggingFaceH4/zephyr-7b-beta").launch()