Spaces:
Sleeping
Sleeping
from langchain.document_loaders import DirectoryLoader | |
from langchain.text_splitter import CharacterTextSplitter | |
import os | |
import pinecone | |
from langchain.vectorstores import Pinecone | |
from langchain.embeddings.openai import OpenAIEmbeddings | |
from langchain.chains import RetrievalQA | |
from langchain.chat_models import ChatOpenAI | |
import streamlit as st | |
from dotenv import load_dotenv | |
load_dotenv() | |
PINECONE_API_KEY = os.getenv('PINECONE_API_KEY') | |
PINECONE_ENV = os.getenv('PINECONE_ENV') | |
OPENAI_API_KEY = os.getenv('OPENAI_API_KEY') | |
os.environ['OPENAI_API_KEY'] = OPENAI_API_KEY | |
def doc_preprocessing(): | |
loader = DirectoryLoader( | |
'data/', | |
glob='**/*.pdf', # only the PDFs | |
show_progress=True | |
) | |
docs = loader.load() | |
text_splitter = CharacterTextSplitter( | |
chunk_size=1000, | |
chunk_overlap=0 | |
) | |
docs_split = text_splitter.split_documents(docs) | |
return docs_split | |
def embedding_db(): | |
# we use the openAI embedding model | |
embeddings = OpenAIEmbeddings() | |
# Initialize Pinecone | |
pc = Pinecone( | |
api_key=PINECONE_API_KEY, | |
environment=PINECONE_ENV | |
) | |
docs_split = doc_preprocessing() | |
# Check if index exists, create if needed | |
if 'langchain-demo-indexes' not in pc.list_indexes().names(): | |
pc.create_index( | |
name='langchain-demo-indexes', | |
dimension=1536, # Adjust dimension if needed | |
metric='euclidean', | |
spec=ServerlessSpec(cloud='aws', region='us-west-2') | |
) | |
doc_db = Pinecone.from_documents( | |
docs_split, | |
embeddings, | |
index_name='langchain-demo-indexes', | |
client=pc # Pass the Pinecone object | |
) | |
return doc_db | |
llm = ChatOpenAI() | |
doc_db = embedding_db() | |
def retrieval_answer(query): | |
qa = RetrievalQA.from_chain_type( | |
llm=llm, | |
chain_type='stuff', | |
retriever=doc_db.as_retriever(), | |
) | |
query = query | |
result = qa.run(query) | |
return result | |
def main(): | |
st.title("Question and Answering App powered by LLM and Pinecone") | |
text_input = st.text_input("Ask your query...") | |
if st.button("Ask Query"): | |
if len(text_input)>0: | |
st.info("Your Query: " + text_input) | |
answer = retrieval_answer(text_input) | |
st.success(answer) | |
if __name__ == "__main__": | |
main() | |