File size: 7,680 Bytes
7d849d3
413cb20
9e4c9f3
 
 
 
 
 
 
 
 
 
 
 
 
 
161c0d8
 
9e4c9f3
 
 
 
7d849d3
33476c5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f1260bc
33476c5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
390cad0
 
 
 
 
 
 
 
 
33476c5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
import streamlit as st
from tempfile import NamedTemporaryFile

import pprint
import google.generativeai as palm
import os
from dotenv import load_dotenv, find_dotenv
from langchain.embeddings import GooglePalmEmbeddings
from langchain.llms import GooglePalm

from langchain.document_loaders import UnstructuredURLLoader  #load urls into docoument-loader
from langchain.chains.question_answering import load_qa_chain
from langchain.indexes import VectorstoreIndexCreator #vectorize db index with chromadb
from langchain.text_splitter import CharacterTextSplitter #text splitter
from langchain.chains import RetrievalQA
from langchain.document_loaders import UnstructuredPDFLoader  #load pdf
from langchain.agents import create_pandas_dataframe_agent
# from langchain_experimental.agents.agent_toolkits import create_pandas_dataframe_agent

import pandas as pd
import numpy as np
import pprint

isPswdValid = False
try:
    pswdVal = st.experimental_get_query_params()['pwd'][0]
    if pswdVal==st.secrets["PSWD"]:
        isPswdValid = True
except:
    pass

if not isPswdValid:
    st.write("Invalid Password")
else:
    radioButtonList = ["E-commerce CSV (https://www.kaggle.com/datasets/mervemenekse/ecommerce-dataset)",
    "Upload my own CSV",
    "Upload my own PDF",
    "URL Chat with Google's Latest Earnings (https://abc.xyz/investor/)",
    "Enter my own URL"]

    # Add some designs to the radio buttons
    st.markdown("""
    <style>
    .stRadio {
      padding: 10px;
      border-radius: 5px;
      background-color: #f5f5f5;
    }

    .stRadio input[type="radio"] {
      position: absolute;
      opacity: 0;
      cursor: pointer;
    }

    .stRadio label {
      display: flex;
      justify-content: center;
      align-items: center;
      cursor: pointer;
      font-size: 16px;
      color: #333;
    }

    .stRadio label:hover {
      color: #000;
    }

    .stRadio.st-selected input[type="radio"] ~ label {
      color: #000;
      background-color: #d9d9d9;
    }
    </style>
    """, unsafe_allow_html=True)

    genre = st.radio(
        "Tired of reading your files? Chat with it using AI! Choose dataset to finetune", radioButtonList, index=0
    )

    # Initialize language model
    load_dotenv(find_dotenv()) # read local .env file
    api_key = st.secrets["PALM_API_KEY"] # put your API key here
    os.environ["GOOGLE_API_KEY"] = st.secrets["PALM_API_KEY"]
    palm.configure(api_key=api_key)
    llm = GooglePalm()
    llm.temperature = 0.1

    pdfCSVURLText = ""
    if genre==radioButtonList[0]:
        pdfCSVURLText = "CSV"
        exampleQuestion = "Question1: What was the most sold item? Question2: What was the most common payment?"
        dataDF = pd.read_csv('EcommerceDataset.csv', encoding= 'unicode_escape')
        # st.write('You selected comedy.')
        # else:
        # st.write(f'''Password streamlit app: {st.secrets["PSWD"]}''')
    elif genre==radioButtonList[1]:
        pdfCSVURLText = "CSV"
        exampleQuestion = "What are the data columns?"
    elif genre==radioButtonList[2]:
        pdfCSVURLText = "PDF"
        exampleQuestion = "Can you summarize the contents?"
    elif genre==radioButtonList[3]:
        pdfCSVURLText = "URL"
        exampleQuestion = "What is Google's latest earnings?"
        urls = ['https://abc.xyz/investor/']
        loader = [UnstructuredURLLoader(urls=urls)]
        index = VectorstoreIndexCreator(
                embedding=GooglePalmEmbeddings(),
                text_splitter=CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)).from_loaders(loader)

        chain = RetrievalQA.from_chain_type(llm=llm,
                                    chain_type="stuff",
                                    retriever=index.vectorstore.as_retriever(),
                                    input_key="question")
    elif genre==radioButtonList[4]:
        pdfCSVURLText = "URL"
        exampleQuestion = "Can you summarize the contents?"

    isCustomURL = genre==radioButtonList[4]
    urlInput = st.text_input('Enter your own URL', '', placeholder="Type your URL here (e.g. https://abc.xyz/investor/)", disabled=not isCustomURL)

    isCustomPDF = genre==radioButtonList[1] or genre==radioButtonList[2]
    uploaded_file = st.file_uploader(f"Upload your own {pdfCSVURLText} here", type=pdfCSVURLText.lower(), disabled=not isCustomPDF)
    uploadedFilename = ""
    if uploaded_file is not None:
        with NamedTemporaryFile(dir='.', suffix=f'.{pdfCSVURLText.lower()}') as f:
            f.write(uploaded_file.getbuffer())
            uploadedFilename = f.name
            if genre==radioButtonList[1]: # Custom CSV Upload
                dataDF = pd.read_csv(uploadedFilename, encoding= 'unicode_escape')
            elif genre==radioButtonList[2]: # Custom PDF Upload
                pdf_loaders = [UnstructuredPDFLoader(uploadedFilename)]
                pdf_index = VectorstoreIndexCreator(
                        embedding=GooglePalmEmbeddings(),
                        text_splitter=CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)).from_loaders(pdf_loaders)
                pdf_chain = RetrievalQA.from_chain_type(llm=llm,
                                            chain_type="stuff",
                                            retriever=pdf_index.vectorstore.as_retriever(),
                                            input_key="question")

    enableChatBox = False
    if genre==radioButtonList[0]: # E-commerce CSV
        enableChatBox = True
    elif genre==radioButtonList[1]: # Custom CSV Upload
        enableChatBox = uploadedFilename[-4:]==".csv"
    elif genre==radioButtonList[2]: # Custom PDF Upload
        enableChatBox = uploadedFilename[-4:]==".pdf"
    elif genre==radioButtonList[3]: # Google Alphabet URL Earnings Report
        enableChatBox = True
    elif genre==radioButtonList[4]: # Custom URL
        enableChatBox = True

    chatTextStr = st.text_input(f'Ask me anything about this {pdfCSVURLText}', '', placeholder=f"Type here (e.g. {exampleQuestion})", disabled=not enableChatBox)
    chatWithPDFButton = "CLICK HERE TO START CHATTING"
    if st.button(chatWithPDFButton, disabled=not enableChatBox and not chatTextStr): #  Button Cliked


        if genre==radioButtonList[0]: # E-commerce CSV
            # Initializing the agent
            agent = create_pandas_dataframe_agent(llm, dataDF, verbose=False)
            answer = agent.run(chatTextStr)
            st.write(answer)

        elif genre==radioButtonList[1]: # Custom CSV Upload
            # Initializing the agent
            agent = create_pandas_dataframe_agent(llm, dataDF, verbose=False)
            answer = agent.run(chatTextStr)
            st.write(answer)

        elif genre==radioButtonList[2]: # Custom PDF Upload
            pdf_answer = pdf_chain.run(chatTextStr)
            st.write(pdf_answer)

        elif genre==radioButtonList[3]: # Google Alphabet URL Earnings Report
            answer = chain.run(chatTextStr)
            st.write(answer)

        elif genre==radioButtonList[4]: # Custom URL
            urls = [urlInput]
            loader = [UnstructuredURLLoader(urls=urls)]
            index = VectorstoreIndexCreator(
                    embedding=GooglePalmEmbeddings(),
                    text_splitter=CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)).from_loaders(loader)

            chain = RetrievalQA.from_chain_type(llm=llm,
                                        chain_type="stuff",
                                        retriever=index.vectorstore.as_retriever(),
                                        input_key="question")
            answer = chain.run(chatTextStr)
            st.write(answer)