Spaces:
Sleeping
Sleeping
import streamlit as st | |
from tempfile import NamedTemporaryFile | |
import pprint | |
import os | |
from dotenv import load_dotenv, find_dotenv | |
import os | |
from langchain_openai import ChatOpenAI | |
from langchain_community.document_loaders import PyPDFLoader | |
from langchain.document_loaders.csv_loader import CSVLoader | |
from langchain.document_loaders import WebBaseLoader | |
import pandas as pd | |
import numpy as np | |
import pprint | |
defaultGoogleURL = "https://www.google.com/search?q=google+earnings" | |
OPEN_ROUTER_KEY = st.secrets["OPEN_ROUTER_KEY"] | |
OPEN_ROUTER_MODEL = "meta-llama/llama-3.1-70b-instruct:free" | |
def pretty_print_columns(text): | |
""" | |
Beautifies the provided CSV column description text. | |
Args: | |
text (str): The input string containing the column descriptions. | |
Returns: | |
str: The beautified string with neatly formatted column descriptions. | |
""" | |
return " ".join([line.strip() for line in text.splitlines() if line.strip()]) | |
isPswdValid = True | |
try: | |
pswdVal = st.experimental_get_query_params()['pwd'][0] | |
if pswdVal==st.secrets["PSWD"]: | |
isPswdValid = True | |
except: | |
pass | |
if not isPswdValid: | |
st.write("Invalid Password") | |
else: | |
radioButtonList = ["E-commerce CSV (https://www.kaggle.com/datasets/mervemenekse/ecommerce-dataset)", | |
"Upload my own CSV", | |
"Upload my own PDF", | |
f"URL Chat with Google's Latest Earnings ({defaultGoogleURL})", | |
"Enter my own URL"] | |
# Add some designs to the radio buttons | |
st.markdown(""" | |
<style> | |
.stRadio { | |
padding: 10px; | |
border-radius: 5px; | |
background-color: #f5f5f5; | |
} | |
.stRadio input[type="radio"] { | |
position: absolute; | |
opacity: 0; | |
cursor: pointer; | |
} | |
.stRadio label { | |
display: flex; | |
justify-content: center; | |
align-items: center; | |
cursor: pointer; | |
font-size: 16px; | |
color: #333; | |
} | |
.stRadio label:hover { | |
color: #000; | |
} | |
.stRadio.st-selected input[type="radio"] ~ label { | |
color: #000; | |
background-color: #d9d9d9; | |
} | |
</style> | |
""", unsafe_allow_html=True) | |
genre = st.radio( | |
"Tired of reading your files? Chat with it using AI! Choose dataset to finetune", radioButtonList, index=0 | |
) | |
# Initialize language model | |
load_dotenv(find_dotenv()) # read local .env file | |
llm = ChatOpenAI(model=OPEN_ROUTER_MODEL, temperature=0.1, openai_api_key=OPEN_ROUTER_KEY, openai_api_base="https://openrouter.ai/api/v1") | |
pdfCSVURLText = "" | |
if genre==radioButtonList[0]: | |
pdfCSVURLText = "CSV" | |
exampleQuestion = "Question1: What was the most sold item? Question2: What was the most common payment?" | |
loader = CSVLoader(file_path='EcommerceDataset.csv') | |
csv_data = loader.load() | |
# st.write('You selected comedy.') | |
# else: | |
# st.write(f'''Password streamlit app: {st.secrets["PSWD"]}''') | |
elif genre==radioButtonList[1]: | |
pdfCSVURLText = "CSV" | |
exampleQuestion = "What are the data columns?" | |
elif genre==radioButtonList[2]: | |
pdfCSVURLText = "PDF" | |
exampleQuestion = "Can you summarize the contents?" | |
elif genre==radioButtonList[3]: | |
pdfCSVURLText = "URL" | |
exampleQuestion = "What is Google's latest earnings?" | |
elif genre==radioButtonList[4]: | |
pdfCSVURLText = "URL" | |
exampleQuestion = "Can you summarize the contents?" | |
isCustomURL = genre==radioButtonList[4] | |
urlInput = st.text_input('Enter your own URL', '', placeholder=f"Type your URL here (e.g. {defaultGoogleURL})", disabled=not isCustomURL) | |
isCustomPDF = genre==radioButtonList[1] or genre==radioButtonList[2] | |
uploaded_file = st.file_uploader(f"Upload your own {pdfCSVURLText} here", type=pdfCSVURLText.lower(), disabled=not isCustomPDF) | |
uploadedFilename = "" | |
if uploaded_file is not None: | |
with NamedTemporaryFile(dir='.', suffix=f'.{pdfCSVURLText.lower()}') as f: | |
f.write(uploaded_file.getbuffer()) | |
uploadedFilename = f.name | |
if genre==radioButtonList[1]: # Custom CSV Upload | |
loader = CSVLoader(file_path=uploadedFilename) | |
csv_data = loader.load() | |
elif genre==radioButtonList[2]: # Custom PDF Upload | |
loader = PyPDFLoader(uploadedFilename) | |
pdf_pages = loader.load_and_split() | |
enableChatBox = False | |
if genre==radioButtonList[0]: # E-commerce CSV | |
enableChatBox = True | |
elif genre==radioButtonList[1]: # Custom CSV Upload | |
enableChatBox = uploadedFilename[-4:]==".csv" | |
elif genre==radioButtonList[2]: # Custom PDF Upload | |
enableChatBox = uploadedFilename[-4:]==".pdf" | |
elif genre==radioButtonList[3]: # Google Alphabet URL Earnings Report | |
enableChatBox = True | |
elif genre==radioButtonList[4]: # Custom URL | |
enableChatBox = True | |
chatTextStr = st.text_input(f'Ask me anything about this {pdfCSVURLText}', '', placeholder=f"Type here (e.g. {exampleQuestion})", disabled=not enableChatBox) | |
chatWithPDFButton = "CLICK HERE TO START CHATTING" | |
if st.button(chatWithPDFButton, disabled=not enableChatBox and not chatTextStr): # Button Cliked | |
if genre==radioButtonList[0]: # E-commerce CSV | |
# Initializing the agent | |
answer = llm.predict(f''' | |
I have CSV file contents below: | |
{str(csv_data)} | |
{chatTextStr} | |
''') | |
st.write(answer) | |
elif genre==radioButtonList[1]: # Custom CSV Upload | |
# Initializing the agent | |
answer = llm.predict(f''' | |
I have CSV file contents below: | |
{str(csv_data)} | |
{chatTextStr} | |
''') | |
st.write(answer) | |
elif genre==radioButtonList[2]: # Custom PDF Upload | |
pdf_answer = llm.predict(f''' | |
I have PDF file contents below: | |
{str(pdf_pages)} | |
{chatTextStr} | |
''') | |
st.write(pdf_answer) | |
elif genre==radioButtonList[3]: # Google Alphabet URL Earnings Report | |
loader = WebBaseLoader(defaultGoogleURL) | |
web_data = loader.load() | |
answer = llm.predict(f''' | |
I have website contents below: | |
{str(web_data)} | |
{chatTextStr} | |
''') | |
st.write(answer) | |
elif genre==radioButtonList[4]: # Custom URL | |
loader = WebBaseLoader(urlInput) | |
web_data = loader.load() | |
answer = llm.predict(f''' | |
I have website contents below: | |
{str(web_data)} | |
{chatTextStr} | |
''') | |
st.write(answer) | |