pip install -qU cassio datasets langchain openai tiktoken # LangChain components to use from langchain.vectorstores.cassandra import Cassandra from langchain.indexes.vectorstore import VectorStoreIndexWrapper from langchain.llms import OpenAI from langchain.embeddings import OpenAIEmbeddings # Support for dataset retrieval with Hugging Face from datasets import load_dataset # With CassIO, the engine powering the Astra DB integration in LangChain, # you will also initialize the DB connection: import cassio pip install PyPDF2 from PyPDF2 import PdfReader ASTRA_DB_APPLICATION_TOKEN = "AstraCS:OsOjMKLLxkWFoUpmNbWeJwIP:d8b4df7fd17c288edd265f9d167fa821e97e9d97098842c2e3ed4140d756d02d" ASTRA_DB_ID = "f97bbcce-b48b-4b42-8ad0-fdc38b2e165e" # enter your Database ID OPENAI_API_KEY = "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" # enter your OpenAI key # provide the path of pdf file/files. pdfreader = PdfReader('Ethics.pdf') from typing_extensions import Concatenate # read text from pdf raw_text = '' for i, page in enumerate(pdfreader.pages): content = page.extract_text() if content: raw_text += content cassio.init(token=ASTRA_DB_APPLICATION_TOKEN, database_id=ASTRA_DB_ID) llm = OpenAI(openai_api_key=OPENAI_API_KEY) embedding = OpenAIEmbeddings(openai_api_key=OPENAI_API_KEY) astra_vector_store = Cassandra( embedding=embedding, table_name="qa_mini_demo", session=None, keyspace=None, ) from langchain.text_splitter import CharacterTextSplitter # We need to split the text using Character Text Split such that it sshould not increse token size text_splitter = CharacterTextSplitter( separator = "\n", chunk_size = 800, chunk_overlap = 200, length_function = len, ) texts = text_splitter.split_text(raw_text) astra_vector_store.add_texts(texts[:]) print("Inserted %i headlines." % len(texts[:])) astra_vector_index = VectorStoreIndexWrapper(vectorstore=astra_vector_store) first_question = True while True: if first_question: query_text = input("\nEnter your question (or type 'quit' to exit): ").strip() else: query_text = input("\nWhat's your next question (or type 'quit' to exit): ").strip() if query_text.lower() == "quit": break if query_text == "": continue first_question = False print("\nQUESTION: \"%s\"" % query_text) answer = astra_vector_index.query(query_text, llm=llm).strip() print("ANSWER: \"%s\"\n" % answer)