Spaces:
Sleeping
Sleeping
File size: 1,947 Bytes
2b9a300 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 |
rom langchain.document_loaders import DirectoryLoader
from langchain.text_splitter import CharacterTextSplitter
import os
import pinecone
from langchain.vectorstores import Pinecone
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.chains import RetrievalQA
from langchain.chat_models import ChatOpenAI
import streamlit as st
from dotenv import load_dotenv
load_dotenv()
PINECONE_API_KEY = os.getenv('PINECONE_API_KEY')
PINECONE_ENV = os.getenv('PINECONE_ENV')
OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')
os.environ['OPENAI_API_KEY'] = OPENAI_API_KEY
def doc_preprocessing():
loader = DirectoryLoader(
'data/',
glob='**/*.pdf', # only the PDFs
show_progress=True
)
docs = loader.load()
text_splitter = CharacterTextSplitter(
chunk_size=1000,
chunk_overlap=0
)
docs_split = text_splitter.split_documents(docs)
return docs_split
@st.cache_resource
def embedding_db():
# we use the openAI embedding model
embeddings = OpenAIEmbeddings()
pinecone.init(
api_key=PINECONE_API_KEY,
environment=PINECONE_ENV
)
docs_split = doc_preprocessing()
doc_db = Pinecone.from_documents(
docs_split,
embeddings,
index_name='langchain-demo-indexes'
)
return doc_db
llm = ChatOpenAI()
doc_db = embedding_db()
def retrieval_answer(query):
qa = RetrievalQA.from_chain_type(
llm=llm,
chain_type='stuff',
retriever=doc_db.as_retriever(),
)
query = query
result = qa.run(query)
return result
def main():
st.title("Question and Answering App powered by LLM and Pinecone")
text_input = st.text_input("Ask your query...")
if st.button("Ask Query"):
if len(text_input)>0:
st.info("Your Query: " + text_input)
answer = retrieval_answer(text_input)
st.success(answer)
if __name__ == "__main__":
main()
|