Spaces:
Runtime error
Runtime error
import streamlit as st | |
from PyPDF2 import PdfReader | |
from langchain.embeddings.openai import OpenAIEmbeddings | |
from langchain.text_splitter import CharacterTextSplitter | |
from langchain.vectorstores import FAISS | |
from langchain.agents import initialize_agent, AgentType | |
from langchain.callbacks import StreamlitCallbackHandler | |
from langchain.chat_models import ChatOpenAI | |
from langchain.chains.question_answering import load_qa_chain | |
from langchain.llms import OpenAI | |
from langchain_openai import ChatOpenAI | |
from langchain_core.output_parsers import StrOutputParser | |
from langchain_core.runnables import RunnablePassthrough | |
import os | |
from dotenv import load_dotenv | |
load_dotenv() | |
# provide the path of pdf file/files. | |
pdfreader = PdfReader('input_data/nvidia_10k.pdf') | |
with st.sidebar: | |
openai_api_key = st.text_input("OpenAI API Key", type="password") | |
"[Get an OpenAI API key](https://platform.openai.com/account/api-keys)" | |
def split_chunk_text(input_path="input_data/nvidia_10k.pdf"): | |
from typing_extensions import Concatenate | |
# read text from pdf | |
pdfreader = PdfReader(input_path) | |
raw_text = '' | |
for i, page in enumerate(pdfreader.pages): | |
content = page.extract_text() | |
if content: | |
raw_text += content | |
text_splitter = CharacterTextSplitter( | |
separator = "\n", | |
chunk_size = 800, | |
chunk_overlap = 200, | |
length_function = len, | |
) | |
texts = text_splitter.split_text(raw_text) | |
return texts | |
with st.form("my_form"): | |
texts = split_chunk_text() | |
embeddings = OpenAIEmbeddings( | |
model="text-embedding-3-small" | |
) | |
vector_store = FAISS.from_texts(texts, embeddings) | |
retriever = vector_store.as_retriever() | |
text = st.text_area("Enter question:", " ") | |
submitted = st.form_submit_button("Submit") | |
if not openai_api_key: | |
st.info("Please add your OpenAI API key to continue.") | |
elif submitted: | |
#texts = split_chunk_text() | |
from operator import itemgetter | |
from langchain.prompts import ChatPromptTemplate | |
template = """Answer the question based only on the following context. If you cannot answer the question with the context, please respond with 'I don't know': | |
Context: | |
{context} | |
Question: | |
{question} | |
""" | |
prompt = ChatPromptTemplate.from_template(template) | |
primary_qa_llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0) | |
retrieval_augmented_qa_chain = ( | |
# INVOKE CHAIN WITH: {"question" : "<<SOME USER QUESTION>>"} | |
# "question" : populated by getting the value of the "question" key | |
# "context" : populated by getting the value of the "question" key and chaining it into the base_retriever | |
{"context": itemgetter("question") | retriever, "question": itemgetter("question")} | |
# "context" : is assigned to a RunnablePassthrough object (will not be called or considered in the next step) | |
# by getting the value of the "context" key from the previous step | |
| RunnablePassthrough.assign(context=itemgetter("context")) | |
# "response" : the "context" and "question" values are used to format our prompt object and then piped | |
# into the LLM and stored in a key called "response" | |
# "context" : populated by getting the value of the "context" key from the previous step | |
| {"response": prompt | primary_qa_llm, "context": itemgetter("context")} | |
) | |
#query = "Who is liable in case of an accident if a learner is driving with an instructor?" | |
result = retrieval_augmented_qa_chain.invoke({"question" : text}) | |
st.info(result["response"].content) |