Spaces:
Sleeping
Sleeping
import streamlit as st | |
from tempfile import NamedTemporaryFile | |
import pprint | |
import google.generativeai as palm | |
import os | |
from dotenv import load_dotenv, find_dotenv | |
from langchain.embeddings import GooglePalmEmbeddings | |
from langchain.llms import GooglePalm | |
from langchain.document_loaders import UnstructuredURLLoader #load urls into docoument-loader | |
from langchain.chains.question_answering import load_qa_chain | |
from langchain.indexes import VectorstoreIndexCreator #vectorize db index with chromadb | |
from langchain.text_splitter import CharacterTextSplitter #text splitter | |
from langchain.chains import RetrievalQA | |
from langchain.document_loaders import UnstructuredPDFLoader #load pdf | |
from langchain.agents import create_pandas_dataframe_agent | |
# from langchain_experimental.agents.agent_toolkits import create_pandas_dataframe_agent | |
import pandas as pd | |
import numpy as np | |
import pprint | |
isPswdValid = False | |
try: | |
pswdVal = st.experimental_get_query_params()['pwd'][0] | |
if pswdVal==st.secrets["PSWD"]: | |
isPswdValid = True | |
except: | |
pass | |
if not isPswdValid: | |
st.write("Invalid Password") | |
else: | |
radioButtonList = ["E-commerce CSV (https://www.kaggle.com/datasets/mervemenekse/ecommerce-dataset)", | |
"Upload my own CSV", | |
"Upload my own PDF", | |
"URL Chat with Google's Latest Earnings (https://abc.xyz/investor/)", | |
"Enter my own URL"] | |
# Add some designs to the radio buttons | |
st.markdown(""" | |
<style> | |
.stRadio { | |
padding: 10px; | |
border-radius: 5px; | |
background-color: #f5f5f5; | |
} | |
.stRadio input[type="radio"] { | |
position: absolute; | |
opacity: 0; | |
cursor: pointer; | |
} | |
.stRadio label { | |
display: flex; | |
justify-content: center; | |
align-items: center; | |
cursor: pointer; | |
font-size: 16px; | |
color: #333; | |
} | |
.stRadio label:hover { | |
color: #000; | |
} | |
.stRadio.st-selected input[type="radio"] ~ label { | |
color: #000; | |
background-color: #d9d9d9; | |
} | |
</style> | |
""", unsafe_allow_html=True) | |
genre = st.radio( | |
"Tired of reading your files? Chat with it using AI! Choose dataset to finetune", radioButtonList, index=0 | |
) | |
# Initialize language model | |
load_dotenv(find_dotenv()) # read local .env file | |
api_key = st.secrets["PALM_API_KEY"] # put your API key here | |
os.environ["GOOGLE_API_KEY"] = st.secrets["PALM_API_KEY"] | |
palm.configure(api_key=api_key) | |
llm = GooglePalm() | |
llm.temperature = 0.1 | |
pdfCSVURLText = "" | |
if genre==radioButtonList[0]: | |
pdfCSVURLText = "CSV" | |
exampleQuestion = "Question1: What was the most sold item? Question2: What was the most common payment?" | |
dataDF = pd.read_csv('EcommerceDataset.csv', encoding= 'unicode_escape') | |
# st.write('You selected comedy.') | |
# else: | |
# st.write(f'''Password streamlit app: {st.secrets["PSWD"]}''') | |
elif genre==radioButtonList[1]: | |
pdfCSVURLText = "CSV" | |
exampleQuestion = "What are the data columns?" | |
elif genre==radioButtonList[2]: | |
pdfCSVURLText = "PDF" | |
exampleQuestion = "Can you summarize the contents?" | |
elif genre==radioButtonList[3]: | |
pdfCSVURLText = "URL" | |
exampleQuestion = "What is Google's latest earnings?" | |
urls = ['https://abc.xyz/investor/'] | |
loader = [UnstructuredURLLoader(urls=urls)] | |
index = VectorstoreIndexCreator( | |
embedding=GooglePalmEmbeddings(), | |
text_splitter=CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)).from_loaders(loader) | |
chain = RetrievalQA.from_chain_type(llm=llm, | |
chain_type="stuff", | |
retriever=index.vectorstore.as_retriever(), | |
input_key="question") | |
elif genre==radioButtonList[4]: | |
pdfCSVURLText = "URL" | |
exampleQuestion = "Can you summarize the contents?" | |
isCustomURL = genre==radioButtonList[4] | |
urlInput = st.text_input('Enter your own URL', '', placeholder="Type your URL here (e.g. https://abc.xyz/investor/)", disabled=not isCustomURL) | |
isCustomPDF = genre==radioButtonList[1] or genre==radioButtonList[2] | |
uploaded_file = st.file_uploader(f"Upload your own {pdfCSVURLText} here", type=pdfCSVURLText.lower(), disabled=not isCustomPDF) | |
uploadedFilename = "" | |
if uploaded_file is not None: | |
with NamedTemporaryFile(dir='.', suffix=f'.{pdfCSVURLText.lower()}') as f: | |
f.write(uploaded_file.getbuffer()) | |
uploadedFilename = f.name | |
if genre==radioButtonList[1]: # Custom CSV Upload | |
dataDF = pd.read_csv(uploadedFilename, encoding= 'unicode_escape') | |
elif genre==radioButtonList[2]: # Custom PDF Upload | |
pdf_loaders = [UnstructuredPDFLoader(uploadedFilename)] | |
pdf_index = VectorstoreIndexCreator( | |
embedding=GooglePalmEmbeddings(), | |
text_splitter=CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)).from_loaders(pdf_loaders) | |
pdf_chain = RetrievalQA.from_chain_type(llm=llm, | |
chain_type="stuff", | |
retriever=pdf_index.vectorstore.as_retriever(), | |
input_key="question") | |
enableChatBox = False | |
if genre==radioButtonList[0]: # E-commerce CSV | |
enableChatBox = True | |
elif genre==radioButtonList[1]: # Custom CSV Upload | |
enableChatBox = uploadedFilename[-4:]==".csv" | |
elif genre==radioButtonList[2]: # Custom PDF Upload | |
enableChatBox = uploadedFilename[-4:]==".pdf" | |
elif genre==radioButtonList[3]: # Google Alphabet URL Earnings Report | |
enableChatBox = True | |
elif genre==radioButtonList[4]: # Custom URL | |
enableChatBox = True | |
chatTextStr = st.text_input(f'Ask me anything about this {pdfCSVURLText}', '', placeholder=f"Type here (e.g. {exampleQuestion})", disabled=not enableChatBox) | |
chatWithPDFButton = "CLICK HERE TO START CHATTING" | |
if st.button(chatWithPDFButton, disabled=not enableChatBox and not chatTextStr): # Button Cliked | |
if genre==radioButtonList[0]: # E-commerce CSV | |
# Initializing the agent | |
agent = create_pandas_dataframe_agent(llm, dataDF, verbose=False) | |
answer = agent.run(chatTextStr) | |
st.write(answer) | |
elif genre==radioButtonList[1]: # Custom CSV Upload | |
# Initializing the agent | |
agent = create_pandas_dataframe_agent(llm, dataDF, verbose=False) | |
answer = agent.run(chatTextStr) | |
st.write(answer) | |
elif genre==radioButtonList[2]: # Custom PDF Upload | |
pdf_answer = pdf_chain.run(chatTextStr) | |
st.write(pdf_answer) | |
elif genre==radioButtonList[3]: # Google Alphabet URL Earnings Report | |
answer = chain.run(chatTextStr) | |
st.write(answer) | |
elif genre==radioButtonList[4]: # Custom URL | |
urls = [urlInput] | |
loader = [UnstructuredURLLoader(urls=urls)] | |
index = VectorstoreIndexCreator( | |
embedding=GooglePalmEmbeddings(), | |
text_splitter=CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)).from_loaders(loader) | |
chain = RetrievalQA.from_chain_type(llm=llm, | |
chain_type="stuff", | |
retriever=index.vectorstore.as_retriever(), | |
input_key="question") | |
answer = chain.run(chatTextStr) | |
st.write(answer) | |