import streamlit as st from tempfile import NamedTemporaryFile import pprint import google.generativeai as palm import os from dotenv import load_dotenv, find_dotenv from langchain.embeddings import GooglePalmEmbeddings from langchain.llms import GooglePalm from langchain.document_loaders import UnstructuredURLLoader #load urls into docoument-loader from langchain.chains.question_answering import load_qa_chain from langchain.indexes import VectorstoreIndexCreator #vectorize db index with chromadb from langchain.text_splitter import CharacterTextSplitter #text splitter from langchain.chains import RetrievalQA from langchain.document_loaders import UnstructuredPDFLoader #load pdf from langchain.agents import create_pandas_dataframe_agent import pandas as pd import numpy as np import pprint isPswdValid = False try: pswdVal = st.experimental_get_query_params()['pwd'][0] if pswdVal==st.secrets["PSWD"]: isPswdValid = True except: pass if not isPswdValid: st.write("Invalid Password") else: radioButtonList = ["E-commerce CSV (https://www.kaggle.com/datasets/mervemenekse/ecommerce-dataset)", "Upload my own CSV", "Upload my own PDF", "URL Chat with Google's Latest Earnings (https://abc.xyz/investor/)", "Enter my own URL"] # Add some designs to the radio buttons st.markdown(""" """, unsafe_allow_html=True) genre = st.radio( "Choose dataset to finetune", radioButtonList, index=0 ) # Initialize language model load_dotenv(find_dotenv()) # read local .env file api_key = st.secrets["PALM_API_KEY"] # put your API key here os.environ["GOOGLE_API_KEY"] = st.secrets["PALM_API_KEY"] palm.configure(api_key=api_key) llm = GooglePalm() llm.temperature = 0.1 pdfCSVURLText = "" if genre==radioButtonList[0]: pdfCSVURLText = "CSV" exampleQuestion = "Question1: What was the most sold item? Question2: What was the most common payment?" dataDF = pd.read_csv('EcommerceDataset.csv', encoding= 'unicode_escape') # st.write('You selected comedy.') # else: # st.write(f'''Password streamlit app: {st.secrets["PSWD"]}''') elif genre==radioButtonList[1]: pdfCSVURLText = "CSV" exampleQuestion = "What are the data columns?" elif genre==radioButtonList[2]: pdfCSVURLText = "PDF" exampleQuestion = "Can you summarize the contents?" elif genre==radioButtonList[3]: pdfCSVURLText = "URL" exampleQuestion = "What is Google's latest earnings?" urls = ['https://abc.xyz/investor/'] loader = [UnstructuredURLLoader(urls=urls)] index = VectorstoreIndexCreator( embedding=GooglePalmEmbeddings(), text_splitter=CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)).from_loaders(loader) chain = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=index.vectorstore.as_retriever(), input_key="question") elif genre==radioButtonList[4]: pdfCSVURLText = "URL" exampleQuestion = "Can you summarize the contents?" isCustomURL = genre==radioButtonList[4] urlInput = st.text_input('Enter your own URL', '', placeholder="Type your URL here (e.g. https://abc.xyz/investor/)", disabled=not isCustomURL) isCustomPDF = genre==radioButtonList[1] or genre==radioButtonList[2] uploaded_file = st.file_uploader(f"Upload your own {pdfCSVURLText} here", type=pdfCSVURLText.lower(), disabled=not isCustomPDF) uploadedFilename = "" if uploaded_file is not None: with NamedTemporaryFile(dir='.', suffix=f'.{pdfCSVURLText.lower()}') as f: f.write(uploaded_file.getbuffer()) uploadedFilename = f.name if genre==radioButtonList[1]: # Custom CSV Upload dataDF = pd.read_csv(uploadedFilename, encoding= 'unicode_escape') elif genre==radioButtonList[2]: # Custom PDF Upload pdf_loaders = [UnstructuredPDFLoader(uploadedFilename)] pdf_index = VectorstoreIndexCreator( embedding=GooglePalmEmbeddings(), text_splitter=CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)).from_loaders(pdf_loaders) pdf_chain = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=pdf_index.vectorstore.as_retriever(), input_key="question") enableChatBox = False if genre==radioButtonList[0]: # E-commerce CSV enableChatBox = True elif genre==radioButtonList[1]: # Custom CSV Upload enableChatBox = uploadedFilename[-4:]==".csv" elif genre==radioButtonList[2]: # Custom PDF Upload enableChatBox = uploadedFilename[-4:]==".pdf" elif genre==radioButtonList[3]: # Google Alphabet URL Earnings Report enableChatBox = True elif genre==radioButtonList[4]: # Custom URL enableChatBox = True chatTextStr = st.text_input(f'Ask me anything about this {pdfCSVURLText}', '', placeholder=f"Type here (e.g. {exampleQuestion})", disabled=not enableChatBox) chatWithPDFButton = "CLICK HERE TO START CHATTING" if st.button(chatWithPDFButton, disabled=not enableChatBox and not chatTextStr): # Button Cliked if genre==radioButtonList[0]: # E-commerce CSV # Initializing the agent agent = create_pandas_dataframe_agent(llm, dataDF, verbose=False) answer = agent.run(chatTextStr) st.write(answer) elif genre==radioButtonList[1]: # Custom CSV Upload # Initializing the agent agent = create_pandas_dataframe_agent(llm, dataDF, verbose=False) answer = agent.run(chatTextStr) st.write(answer) elif genre==radioButtonList[2]: # Custom PDF Upload pdf_answer = pdf_chain.run(chatTextStr) st.write(pdf_answer) elif genre==radioButtonList[3]: # Google Alphabet URL Earnings Report answer = chain.run(chatTextStr) st.write(answer) elif genre==radioButtonList[4]: # Custom URL urls = [urlInput] loader = [UnstructuredURLLoader(urls=urls)] index = VectorstoreIndexCreator( embedding=GooglePalmEmbeddings(), text_splitter=CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)).from_loaders(loader) chain = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=index.vectorstore.as_retriever(), input_key="question") answer = chain.run(chatTextStr) st.write(answer)