chatbot / app.py
hail75's picture
add rag
a007d8f
raw
history blame
3.28 kB
import os
import streamlit as st
from langchain_openai import OpenAIEmbeddings
from langchain_openai.chat_models import ChatOpenAI
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import PyPDFLoader
from langchain_community.document_loaders.generic import GenericLoader
from langchain_community.document_loaders.parsers import OpenAIWhisperParser
from langchain_community.document_loaders.blob_loaders.youtube_audio import YoutubeAudioLoader
from langchain_community.vectorstores import Chroma
from langchain_core.prompts import ChatPromptTemplate
from langchain.memory import ConversationBufferMemory
from langchain.chains import RetrievalQA
st.set_page_config(page_title="Chat with your data", page_icon="πŸ€–")
st.title("Chat with your data")
st.header("Add your data for RAG")
data_type = st.radio("Choose the type of data to add:", ("Text", "PDF", "YouTube URL"))
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=150)
pages = None
if data_type == "Text":
user_text = st.text_area("Enter text data")
if st.button("Add"):
pages = user_text
elif data_type == "PDF":
uploaded_pdf = st.file_uploader("Upload PDF", type="pdf")
if st.button("Add"):
loader = PyPDFLoader("docs/ttdn.pdf")
pages = loader.load()
elif data_type == "YouTube URL":
youtube_url = st.text_input("Enter YouTube URL")
if st.button("Add"):
save_dir="docs/youtube"
loader = GenericLoader(
YoutubeAudioLoader([youtube_url], save_dir),
OpenAIWhisperParser()
)
pages = loader.load()
llm = ChatOpenAI(
api_key=os.environ.get("OPENAI_API_KEY"),
temperature=0.2,
model='gpt-3.5-turbo')
template = """Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer. Use three sentences maximum. Keep the answer as concise as possible.
Context: {context}
Question: {question}
Helpful Answer:"""
prompt = ChatPromptTemplate.from_template(template)
if pages:
embedding = OpenAIEmbeddings()
if data_type == "Text":
texts = text_splitter.split_text(pages)
vectordb = Chroma.from_texts(
texts=texts,
embedding=embedding,
persist_directory='docs/chroma/'
)
else:
docs = text_splitter.split_documents(pages)
vectordb = Chroma.from_documents(
documents=docs,
embedding=embedding,
persist_directory='docs/chroma/'
)
qa_chain = RetrievalQA.from_chain_type(
llm,
retriever=vectordb.as_retriever(),
return_source_documents=True,
chain_type_kwargs={"prompt": prompt}
)
result = qa_chain.invoke({"query": "What is BSM Labs"})
st.write(result["result"])
# st.session_state.retriever = vectordb.as_retriever()
# if "retriever" in st.session_state:
# user_query = st.chat_input("Ask a question")
# if user_query:
# chain = prompt | llm | parser
# response = chain.invoke(input={
# "context": st.session_state.retriever,
# "question": user_query
# })
# st.write(response)