Spaces:
Sleeping
Sleeping
from langchain.document_loaders import DirectoryLoader | |
from langchain.text_splitter import CharacterTextSplitter | |
import os | |
import pinecone | |
from langchain.vectorstores import Pinecone | |
from langchain.embeddings.openai import OpenAIEmbeddings | |
from langchain.chains import RetrievalQA | |
from langchain.chat_models import ChatOpenAI | |
import streamlit as st | |
from dotenv import load_dotenv | |
load_dotenv() | |
PINECONE_API_KEY = os.getenv('PINECONE_API_KEY') | |
PINECONE_ENV = os.getenv('PINECONE_ENV') | |
OPENAI_API_KEY = os.getenv('OPENAI_API_KEY') | |
os.environ['OPENAI_API_KEY'] = OPENAI_API_KEY | |
def embedding_db(): | |
embeddings = OpenAIEmbeddings() | |
# Using 'from pinecone import Pinecone' explicitly: | |
from pinecone import Pinecone | |
pc = Pinecone( | |
api_key=PINECONE_API_KEY, | |
environment=PINECONE_ENV | |
) | |
docs_split = doc_preprocessing() # Make sure this function is defined | |
doc_db = Pinecone.from_documents( | |
docs_split, | |
embeddings, | |
index_name='langchain-demo-indexes', | |
client=pc | |
) | |
return doc_db | |
def doc_preprocessing(): | |
loader = DirectoryLoader( | |
'data/', | |
glob='**/*.pdf', | |
show_progress=True | |
) # Extra closing parenthesis removed | |
docs = loader.load() | |
text_splitter = CharacterTextSplitter( | |
chunk_size=1000, | |
chunk_overlap=0 | |
) | |
docs_split = text_splitter.split_documents(docs) | |
return docs_split | |
def retrieval_answer(query): | |
chat_model = ChatOpenAI() | |
qa = RetrievalQA.from_chain_type( | |
llm=chat_model, | |
chain_type='stuff', | |
retriever=doc_db.as_retriever(), | |
) | |
result = qa.run(query) | |
return result | |
def main(): | |
st.title("Question and Answering App powered by LLM and Pinecone") | |
text_input = st.text_input("Ask your query...") | |
if st.button("Ask Query"): | |
if len(text_input) > 0: | |
st.info("Your Query: " + text_input) | |
# Potential loading message | |
with st.spinner("Processing your query..."): | |
doc_db = embedding_db() # Create the embedding database | |
answer = retrieval_answer(text_input) | |
st.success(answer) | |
if __name__ == "__main__": | |
main() |