document / app.py
Nikhil0987's picture
Update app.py
c1b78a4 verified
from langchain.document_loaders import DirectoryLoader
from langchain.text_splitter import CharacterTextSplitter
import os
import pinecone
from langchain.vectorstores import Pinecone
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.chains import RetrievalQA
from langchain.chat_models import ChatOpenAI
import streamlit as st
from dotenv import load_dotenv
load_dotenv()
PINECONE_API_KEY = os.getenv('PINECONE_API_KEY')
PINECONE_ENV = os.getenv('PINECONE_ENV')
OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')
os.environ['OPENAI_API_KEY'] = OPENAI_API_KEY
@st.cache_resource
def embedding_db():
embeddings = OpenAIEmbeddings()
# Using 'from pinecone import Pinecone' explicitly:
from pinecone import Pinecone
pc = Pinecone(
api_key=PINECONE_API_KEY,
environment=PINECONE_ENV
)
docs_split = doc_preprocessing() # Make sure this function is defined
doc_db = Pinecone.from_documents(
docs_split,
embeddings,
index_name='langchain-demo-indexes',
client=pc
)
return doc_db
def doc_preprocessing():
loader = DirectoryLoader(
'data/',
glob='**/*.pdf',
show_progress=True
) # Extra closing parenthesis removed
docs = loader.load()
text_splitter = CharacterTextSplitter(
chunk_size=1000,
chunk_overlap=0
)
docs_split = text_splitter.split_documents(docs)
return docs_split
def retrieval_answer(query):
chat_model = ChatOpenAI()
qa = RetrievalQA.from_chain_type(
llm=chat_model,
chain_type='stuff',
retriever=doc_db.as_retriever(),
)
result = qa.run(query)
return result
def main():
st.title("Question and Answering App powered by LLM and Pinecone")
text_input = st.text_input("Ask your query...")
if st.button("Ask Query"):
if len(text_input) > 0:
st.info("Your Query: " + text_input)
# Potential loading message
with st.spinner("Processing your query..."):
doc_db = embedding_db() # Create the embedding database
answer = retrieval_answer(text_input)
st.success(answer)
if __name__ == "__main__":
main()