Spaces:
Sleeping
Sleeping
File size: 5,956 Bytes
7d849d3 413cb20 9e4c9f3 7d849d3 9830b8e 980beb7 4b4013f b16e227 81b851a 9830b8e b16e227 7888633 874fc5b 63e083c 874fc5b 7888633 874fc5b 980beb7 bf505c6 874fc5b 6a1c9b8 390cad0 4b4013f 413cb20 6cbfbad e4b3526 4b4013f 6cbfbad 413cb20 e4b3526 674ea12 ef1eb58 4b4013f 005a493 7888633 413cb20 7888633 e4b3526 7888633 e4b3526 7888633 413cb20 7888633 8060e77 874fc5b 674ea12 874fc5b 8060e77 7888633 674ea12 7888633 ef1eb58 7888633 95e937e 7888633 390cad0 1504d7b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 |
import streamlit as st
from tempfile import NamedTemporaryFile
import pprint
import google.generativeai as palm
import os
from dotenv import load_dotenv, find_dotenv
from langchain.embeddings import GooglePalmEmbeddings
from langchain.llms import GooglePalm
from langchain.document_loaders import UnstructuredURLLoader #load urls into docoument-loader
from langchain.chains.question_answering import load_qa_chain
from langchain.indexes import VectorstoreIndexCreator #vectorize db index with chromadb
from langchain.text_splitter import CharacterTextSplitter #text splitter
from langchain.chains import RetrievalQA
from langchain.document_loaders import UnstructuredPDFLoader #load pdf
from langchain.agents import create_pandas_dataframe_agent
import pandas as pd
import numpy as np
import pprint
radioButtonList = ["E-commerce CSV (https://www.kaggle.com/datasets/mervemenekse/ecommerce-dataset)",
"Upload my own CSV",
"Upload my own PDF",
"URL Chat with Google Latest Earnings (https://abc.xyz/investor/)",
"Enter my own URL"]
genre = st.radio(
"Choose dataset to finetune", radioButtonList, index=0
)
# Initialize language model
load_dotenv(find_dotenv()) # read local .env file
api_key = st.secrets["PALM_API_KEY"] # put your API key here
os.environ["GOOGLE_API_KEY"] = st.secrets["PALM_API_KEY"]
palm.configure(api_key=api_key)
llm = GooglePalm()
llm.temperature = 0.1
pdfCSVURLText = ""
if genre==radioButtonList[0]:
pdfCSVURLText = "CSV"
dataDF = pd.read_csv('EcommerceDataset.csv', encoding= 'unicode_escape')
# st.write('You selected comedy.')
# else:
# st.write(f'''Password streamlit app: {st.secrets["PSWD"]}''')
elif genre==radioButtonList[1]:
pdfCSVURLText = "CSV"
elif genre==radioButtonList[2]:
pdfCSVURLText = "PDF"
elif genre==radioButtonList[3]:
pdfCSVURLText = "URL"
urls = ['https://abc.xyz/investor/']
loader = [UnstructuredURLLoader(urls=urls)]
index = VectorstoreIndexCreator(
embedding=GooglePalmEmbeddings(),
text_splitter=CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)).from_loaders(loader)
chain = RetrievalQA.from_chain_type(llm=llm,
chain_type="stuff",
retriever=index.vectorstore.as_retriever(),
input_key="question")
elif genre==radioButtonList[4]:
pdfCSVURLText = "URL"
isCustomURL = genre==radioButtonList[4]
urlInput = st.text_input('Enter your own URL', '', placeholder="Type your URL here (e.g. https://abc.xyz/investor/)", disabled=not isCustomURL)
isCustomPDF = genre==radioButtonList[1] or genre==radioButtonList[2]
uploaded_file = st.file_uploader(f"Upload your own {pdfCSVURLText} here", type=pdfCSVURLText.lower(), disabled=not isCustomPDF)
uploadedFilename = ""
if uploaded_file is not None:
with NamedTemporaryFile(dir='.', suffix=f'.{pdfCSVURLText.lower()}') as f:
f.write(uploaded_file.getbuffer())
uploadedFilename = f.name
if genre==radioButtonList[1]: # Custom CSV Upload
dataDF = pd.read_csv(uploadedFilename, encoding= 'unicode_escape')
elif genre==radioButtonList[2]: # Custom PDF Upload
pdf_loaders = [UnstructuredPDFLoader(uploadedFilename)]
pdf_index = VectorstoreIndexCreator(
embedding=GooglePalmEmbeddings(),
text_splitter=CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)).from_loaders(pdf_loaders)
pdf_chain = RetrievalQA.from_chain_type(llm=llm,
chain_type="stuff",
retriever=pdf_index.vectorstore.as_retriever(),
input_key="question")
enableChatBox = False
if genre==radioButtonList[0]: # E-commerce CSV
enableChatBox = True
elif genre==radioButtonList[1]: # Custom CSV Upload
enableChatBox = uploadedFilename[-4:]==".csv"
elif genre==radioButtonList[2]: # Custom PDF Upload
enableChatBox = uploadedFilename[-4:]==".pdf"
elif genre==radioButtonList[3]: # Google Alphabet URL Earnings Report
enableChatBox = True
elif genre==radioButtonList[4]: # Custom URL
enableChatBox = True
chatTextStr = st.text_input(f'Ask me anything about this {pdfCSVURLText}', '', placeholder="Type here (e.g. Question1: What was the most sold item? Question2: What was the most common payment?)", disabled=not enableChatBox)
chatWithPDFButton = "CLICK HERE TO START CHATTING"
if st.button(chatWithPDFButton, disabled=not enableChatBox and not chatTextStr): # Button Cliked
if genre==radioButtonList[0]: # E-commerce CSV
# Initializing the agent
agent = create_pandas_dataframe_agent(llm, dataDF, verbose=False)
answer = agent.run(chatTextStr)
st.write(answer)
elif genre==radioButtonList[1]: # Custom CSV Upload
# Initializing the agent
agent = create_pandas_dataframe_agent(llm, dataDF, verbose=False)
answer = agent.run(chatTextStr)
st.write(answer)
elif genre==radioButtonList[2]: # Custom PDF Upload
pdf_answer = pdf_chain.run(chatTextStr)
st.write(pdf_answer)
elif genre==radioButtonList[3]: # Google Alphabet URL Earnings Report
answer = chain.run(chatTextStr)
st.write(answer)
elif genre==radioButtonList[4]: # Custom URL
urls = [urlInput]
loader = [UnstructuredURLLoader(urls=urls)]
index = VectorstoreIndexCreator(
embedding=GooglePalmEmbeddings(),
text_splitter=CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)).from_loaders(loader)
chain = RetrievalQA.from_chain_type(llm=llm,
chain_type="stuff",
retriever=index.vectorstore.as_retriever(),
input_key="question")
answer = chain.run(chatTextStr)
st.write(answer)
|