Spaces:
Sleeping
Sleeping
File size: 2,540 Bytes
6da0fd1 2b9a300 6da0fd1 2b9a300 6da0fd1 cb193e7 6da0fd1 bba1f1f 6da0fd1 2b9a300 d8ba315 2b9a300 6da0fd1 2b9a300 c33fb07 2b9a300 c33fb07 2b9a300 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 |
from langchain.document_loaders import DirectoryLoader
from langchain.text_splitter import CharacterTextSplitter
import os
import pinecone
from langchain.vectorstores import Pinecone
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.chains import RetrievalQA
from langchain.chat_models import ChatOpenAI
import streamlit as st
from dotenv import load_dotenv
load_dotenv()
PINECONE_API_KEY = os.getenv('PINECONE_API_KEY')
PINECONE_ENV = os.getenv('PINECONE_ENV')
OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')
os.environ['OPENAI_API_KEY'] = OPENAI_API_KEY
@st.cache_resource
def embedding_db():
# we use the openAI embedding model
embeddings = OpenAIEmbeddings()
# Initialize Pinecone: Correct Indentation
pc = pinecone.init(
api_key=PINECONE_API_KEY,
environment=PINECONE_ENV
)
def doc_preprocessing():
loader = DirectoryLoader(
'data/',
glob='**/*.pdf', # only the PDFs
show_progress=True
)
docs = loader.load()
text_splitter = CharacterTextSplitter(
chunk_size=1000,
chunk_overlap=0
)
docs_split = text_splitter.split_documents(docs)
return docs_split
# docs_split = doc_preprocessing()
# # Check if index exists, create if needed
# if 'langchain-demo-indexes' not in pc.list_indexes().names():
# pc.create_index(
# name='langchain-demo-indexes',
# dimension=1536, # Adjust dimension if needed
# metric='euclidean',
# spec=ServerlessSpec(cloud='aws', region='us-west-2')
# )
# doc_db = Pinecone.from_documents(
# docs_split,
# embeddings,
# index_name='langchain-demo-indexes',
# client=pc # Pass the Pinecone object
# )
# return doc_db
# llm = ChatOpenAI()
# doc_db = embedding_db()
def retrieval_answer(query):
chat_model = ChatOpenAI() # Create the LLM instance
qa = RetrievalQA.from_chain_type(
llm=chat_model, # Pass the chat_model instance
chain_type='stuff',
retriever=doc_db.as_retriever(),
)
query = query
result = qa.run(query)
return result
def main():
st.title("Question and Answering App powered by LLM and Pinecone")
text_input = st.text_input("Ask your query...")
if st.button("Ask Query"):
if len(text_input)>0:
st.info("Your Query: " + text_input)
answer = retrieval_answer(text_input)
st.success(answer)
if __name__ == "__main__":
main()
|