document / app.py
Nikhil0987's picture
Update app.py
d8ba315 verified
raw
history blame
2.37 kB
from langchain.document_loaders import DirectoryLoader
from langchain.text_splitter import CharacterTextSplitter
import os
import pinecone
from langchain.vectorstores import Pinecone
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.chains import RetrievalQA
from langchain.chat_models import ChatOpenAI
import streamlit as st
from dotenv import load_dotenv
load_dotenv()
PINECONE_API_KEY = os.getenv('PINECONE_API_KEY')
PINECONE_ENV = os.getenv('PINECONE_ENV')
OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')
os.environ['OPENAI_API_KEY'] = OPENAI_API_KEY
def doc_preprocessing():
loader = DirectoryLoader(
'data/',
glob='**/*.pdf', # only the PDFs
show_progress=True
)
docs = loader.load()
text_splitter = CharacterTextSplitter(
chunk_size=1000,
chunk_overlap=0
)
docs_split = text_splitter.split_documents(docs)
return docs_split
@st.cache_resource
def embedding_db():
# we use the openAI embedding model
embeddings = OpenAIEmbeddings()
# Initialize Pinecone
pc = Pinecone(
api_key=PINECONE_API_KEY,
environment=PINECONE_ENV
)
docs_split = doc_preprocessing()
# Check if index exists, create if needed
if 'langchain-demo-indexes' not in pc.list_indexes().names():
pc.create_index(
name='langchain-demo-indexes',
dimension=1536, # Adjust dimension if needed
metric='euclidean',
spec=ServerlessSpec(cloud='aws', region='us-west-2')
)
doc_db = Pinecone.from_documents(
docs_split,
embeddings,
index_name='langchain-demo-indexes',
client=pc # Pass the Pinecone object
)
return doc_db
llm = ChatOpenAI()
doc_db = embedding_db()
def retrieval_answer(query):
qa = RetrievalQA.from_chain_type(
llm=llm,
chain_type='stuff',
retriever=doc_db.as_retriever(),
)
query = query
result = qa.run(query)
return result
def main():
st.title("Question and Answering App powered by LLM and Pinecone")
text_input = st.text_input("Ask your query...")
if st.button("Ask Query"):
if len(text_input)>0:
st.info("Your Query: " + text_input)
answer = retrieval_answer(text_input)
st.success(answer)
if __name__ == "__main__":
main()