File size: 6,875 Bytes
bb0cbef
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
import streamlit as st
from tempfile import NamedTemporaryFile

import pprint
import os
from dotenv import load_dotenv, find_dotenv
import os
from langchain_openai import ChatOpenAI
from langchain_community.document_loaders import PyPDFLoader
from langchain.document_loaders.csv_loader import CSVLoader
from langchain.document_loaders import WebBaseLoader

import pandas as pd
import numpy as np
import pprint

defaultGoogleURL = "https://www.google.com/search?q=google+earnings"
OPEN_ROUTER_KEY = st.secrets["OPEN_ROUTER_KEY"]
OPEN_ROUTER_MODEL = "meta-llama/llama-3.1-70b-instruct:free"

def pretty_print_columns(text):
    """

    Beautifies the provided CSV column description text.

    

    Args:

        text (str): The input string containing the column descriptions.

    

    Returns:

        str: The beautified string with neatly formatted column descriptions.

    """
    return "  ".join([line.strip() for line in text.splitlines() if line.strip()])

isPswdValid = True
try:
    pswdVal = st.experimental_get_query_params()['pwd'][0]
    if pswdVal==st.secrets["PSWD"]:
        isPswdValid = True
except:
    pass

if not isPswdValid:
    st.write("Invalid Password")
else:
    radioButtonList = ["E-commerce CSV (https://www.kaggle.com/datasets/mervemenekse/ecommerce-dataset)",
    "Upload my own CSV",
    "Upload my own PDF",
    f"URL Chat with Google's Latest Earnings ({defaultGoogleURL})",
    "Enter my own URL"]

    # Add some designs to the radio buttons
    st.markdown("""

    <style>

    .stRadio {

      padding: 10px;

      border-radius: 5px;

      background-color: #f5f5f5;

    }



    .stRadio input[type="radio"] {

      position: absolute;

      opacity: 0;

      cursor: pointer;

    }



    .stRadio label {

      display: flex;

      justify-content: center;

      align-items: center;

      cursor: pointer;

      font-size: 16px;

      color: #333;

    }



    .stRadio label:hover {

      color: #000;

    }



    .stRadio.st-selected input[type="radio"] ~ label {

      color: #000;

      background-color: #d9d9d9;

    }

    </style>

    """, unsafe_allow_html=True)

    genre = st.radio(
        "Tired of reading your files? Chat with it using AI! Choose dataset to finetune", radioButtonList, index=0
    )

    # Initialize language model
    load_dotenv(find_dotenv()) # read local .env file
    llm = ChatOpenAI(model=OPEN_ROUTER_MODEL, temperature=0.1, openai_api_key=OPEN_ROUTER_KEY, openai_api_base="https://openrouter.ai/api/v1")

    pdfCSVURLText = ""
    if genre==radioButtonList[0]:
        pdfCSVURLText = "CSV"
        exampleQuestion = "Question1: What was the most sold item? Question2: What was the most common payment?"
        loader = CSVLoader(file_path='EcommerceDataset.csv')
        csv_data = loader.load()
        # st.write('You selected comedy.')
        # else:
        # st.write(f'''Password streamlit app: {st.secrets["PSWD"]}''')
    elif genre==radioButtonList[1]:
        pdfCSVURLText = "CSV"
        exampleQuestion = "What are the data columns?"
    elif genre==radioButtonList[2]:
        pdfCSVURLText = "PDF"
        exampleQuestion = "Can you summarize the contents?"
    elif genre==radioButtonList[3]:
        pdfCSVURLText = "URL"
        exampleQuestion = "What is Google's latest earnings?"
    elif genre==radioButtonList[4]:
        pdfCSVURLText = "URL"
        exampleQuestion = "Can you summarize the contents?"

    isCustomURL = genre==radioButtonList[4]
    urlInput = st.text_input('Enter your own URL', '', placeholder=f"Type your URL here (e.g. {defaultGoogleURL})", disabled=not isCustomURL)

    isCustomPDF = genre==radioButtonList[1] or genre==radioButtonList[2]
    uploaded_file = st.file_uploader(f"Upload your own {pdfCSVURLText} here", type=pdfCSVURLText.lower(), disabled=not isCustomPDF)
    uploadedFilename = ""
    if uploaded_file is not None:
        with NamedTemporaryFile(dir='.', suffix=f'.{pdfCSVURLText.lower()}') as f:
            f.write(uploaded_file.getbuffer())
            uploadedFilename = f.name
            if genre==radioButtonList[1]: # Custom CSV Upload
                loader = CSVLoader(file_path=uploadedFilename)
                csv_data = loader.load()
            elif genre==radioButtonList[2]: # Custom PDF Upload
                loader = PyPDFLoader(uploadedFilename)
                pdf_pages = loader.load_and_split()

    enableChatBox = False
    if genre==radioButtonList[0]: # E-commerce CSV
        enableChatBox = True
    elif genre==radioButtonList[1]: # Custom CSV Upload
        enableChatBox = uploadedFilename[-4:]==".csv"
    elif genre==radioButtonList[2]: # Custom PDF Upload
        enableChatBox = uploadedFilename[-4:]==".pdf"
    elif genre==radioButtonList[3]: # Google Alphabet URL Earnings Report
        enableChatBox = True
    elif genre==radioButtonList[4]: # Custom URL
        enableChatBox = True

    chatTextStr = st.text_input(f'Ask me anything about this {pdfCSVURLText}', '', placeholder=f"Type here (e.g. {exampleQuestion})", disabled=not enableChatBox)
    chatWithPDFButton = "CLICK HERE TO START CHATTING"
    if st.button(chatWithPDFButton, disabled=not enableChatBox and not chatTextStr): #  Button Cliked
        if genre==radioButtonList[0]: # E-commerce CSV
            # Initializing the agent
            answer = llm.predict(f'''

            I have CSV file contents below:



            {str(csv_data)}



            {chatTextStr}

            ''')
            st.write(answer)

        elif genre==radioButtonList[1]: # Custom CSV Upload
            # Initializing the agent
            answer = llm.predict(f'''

            I have CSV file contents below:



            {str(csv_data)}



            {chatTextStr}

            ''')
            st.write(answer)

        elif genre==radioButtonList[2]: # Custom PDF Upload
            pdf_answer = llm.predict(f'''

            I have PDF file contents below:



            {str(pdf_pages)}



            {chatTextStr}

            ''')
            st.write(pdf_answer)
        elif genre==radioButtonList[3]: # Google Alphabet URL Earnings Report
            loader = WebBaseLoader(defaultGoogleURL)
            web_data = loader.load()
            answer = llm.predict(f'''

            I have website contents below:



            {str(web_data)}



            {chatTextStr}

            ''')

            st.write(answer)
        elif genre==radioButtonList[4]: # Custom URL
            loader = WebBaseLoader(urlInput)
            web_data = loader.load()
            answer = llm.predict(f'''

            I have website contents below:



            {str(web_data)}



            {chatTextStr}

            ''')
            st.write(answer)