Spaces:
Runtime error
Runtime error
pip install -qU cassio datasets langchain openai tiktoken | |
# LangChain components to use | |
from langchain.vectorstores.cassandra import Cassandra | |
from langchain.indexes.vectorstore import VectorStoreIndexWrapper | |
from langchain.llms import OpenAI | |
from langchain.embeddings import OpenAIEmbeddings | |
# Support for dataset retrieval with Hugging Face | |
from datasets import load_dataset | |
# With CassIO, the engine powering the Astra DB integration in LangChain, | |
# you will also initialize the DB connection: | |
import cassio | |
pip install PyPDF2 | |
from PyPDF2 import PdfReader | |
ASTRA_DB_APPLICATION_TOKEN = "AstraCS:OsOjMKLLxkWFoUpmNbWeJwIP:d8b4df7fd17c288edd265f9d167fa821e97e9d97098842c2e3ed4140d756d02d" | |
ASTRA_DB_ID = "f97bbcce-b48b-4b42-8ad0-fdc38b2e165e" # enter your Database ID | |
OPENAI_API_KEY = "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" # enter your OpenAI key | |
# provide the path of pdf file/files. | |
pdfreader = PdfReader('Ethics.pdf') | |
from typing_extensions import Concatenate | |
# read text from pdf | |
raw_text = '' | |
for i, page in enumerate(pdfreader.pages): | |
content = page.extract_text() | |
if content: | |
raw_text += content | |
cassio.init(token=ASTRA_DB_APPLICATION_TOKEN, database_id=ASTRA_DB_ID) | |
llm = OpenAI(openai_api_key=OPENAI_API_KEY) | |
embedding = OpenAIEmbeddings(openai_api_key=OPENAI_API_KEY) | |
astra_vector_store = Cassandra( | |
embedding=embedding, | |
table_name="qa_mini_demo", | |
session=None, | |
keyspace=None, | |
) | |
from langchain.text_splitter import CharacterTextSplitter | |
# We need to split the text using Character Text Split such that it sshould not increse token size | |
text_splitter = CharacterTextSplitter( | |
separator = "\n", | |
chunk_size = 800, | |
chunk_overlap = 200, | |
length_function = len, | |
) | |
texts = text_splitter.split_text(raw_text) | |
astra_vector_store.add_texts(texts[:]) | |
print("Inserted %i headlines." % len(texts[:])) | |
astra_vector_index = VectorStoreIndexWrapper(vectorstore=astra_vector_store) | |
first_question = True | |
while True: | |
if first_question: | |
query_text = input("\nEnter your question (or type 'quit' to exit): ").strip() | |
else: | |
query_text = input("\nWhat's your next question (or type 'quit' to exit): ").strip() | |
if query_text.lower() == "quit": | |
break | |
if query_text == "": | |
continue | |
first_question = False | |
print("\nQUESTION: \"%s\"" % query_text) | |
answer = astra_vector_index.query(query_text, llm=llm).strip() | |
print("ANSWER: \"%s\"\n" % answer) |