Spaces:
Sleeping
Sleeping
File size: 2,247 Bytes
6da0fd1 2b9a300 6da0fd1 2b9a300 6da0fd1 bebc0af 330c818 bebc0af ca6013c bba1f1f 6da0fd1 bebc0af ca6013c 2b9a300 c1b78a4 ca6013c 2b9a300 c1b78a4 2b9a300 c1b78a4 2b9a300 ca6013c 2b9a300 ca6013c 2b9a300 ca6013c c33fb07 ca6013c 2b9a300 ca6013c 2b9a300 ca6013c 2b9a300 ca6013c 2b9a300 ca6013c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 |
from langchain.document_loaders import DirectoryLoader
from langchain.text_splitter import CharacterTextSplitter
import os
import pinecone
from langchain.vectorstores import Pinecone
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.chains import RetrievalQA
from langchain.chat_models import ChatOpenAI
import streamlit as st
from dotenv import load_dotenv
load_dotenv()
PINECONE_API_KEY = os.getenv('PINECONE_API_KEY')
PINECONE_ENV = os.getenv('PINECONE_ENV')
OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')
os.environ['OPENAI_API_KEY'] = OPENAI_API_KEY
@st.cache_resource
def embedding_db():
embeddings = OpenAIEmbeddings()
# Using 'from pinecone import Pinecone' explicitly:
from pinecone import Pinecone
pc = Pinecone(
api_key=PINECONE_API_KEY,
environment=PINECONE_ENV
)
docs_split = doc_preprocessing() # Make sure this function is defined
doc_db = Pinecone.from_documents(
docs_split,
embeddings,
index_name='langchain-demo-indexes',
client=pc
)
return doc_db
def doc_preprocessing():
loader = DirectoryLoader(
'data/',
glob='**/*.pdf',
show_progress=True
) # Extra closing parenthesis removed
docs = loader.load()
text_splitter = CharacterTextSplitter(
chunk_size=1000,
chunk_overlap=0
)
docs_split = text_splitter.split_documents(docs)
return docs_split
def retrieval_answer(query):
chat_model = ChatOpenAI()
qa = RetrievalQA.from_chain_type(
llm=chat_model,
chain_type='stuff',
retriever=doc_db.as_retriever(),
)
result = qa.run(query)
return result
def main():
st.title("Question and Answering App powered by LLM and Pinecone")
text_input = st.text_input("Ask your query...")
if st.button("Ask Query"):
if len(text_input) > 0:
st.info("Your Query: " + text_input)
# Potential loading message
with st.spinner("Processing your query..."):
doc_db = embedding_db() # Create the embedding database
answer = retrieval_answer(text_input)
st.success(answer)
if __name__ == "__main__":
main() |