pdfchat / app.py
markytools's picture
working code using open server api
bb0cbef
raw
history blame
6.88 kB
import streamlit as st
from tempfile import NamedTemporaryFile
import pprint
import os
from dotenv import load_dotenv, find_dotenv
import os
from langchain_openai import ChatOpenAI
from langchain_community.document_loaders import PyPDFLoader
from langchain.document_loaders.csv_loader import CSVLoader
from langchain.document_loaders import WebBaseLoader
import pandas as pd
import numpy as np
import pprint
defaultGoogleURL = "https://www.google.com/search?q=google+earnings"
OPEN_ROUTER_KEY = st.secrets["OPEN_ROUTER_KEY"]
OPEN_ROUTER_MODEL = "meta-llama/llama-3.1-70b-instruct:free"
def pretty_print_columns(text):
"""
Beautifies the provided CSV column description text.
Args:
text (str): The input string containing the column descriptions.
Returns:
str: The beautified string with neatly formatted column descriptions.
"""
return " ".join([line.strip() for line in text.splitlines() if line.strip()])
isPswdValid = True
try:
pswdVal = st.experimental_get_query_params()['pwd'][0]
if pswdVal==st.secrets["PSWD"]:
isPswdValid = True
except:
pass
if not isPswdValid:
st.write("Invalid Password")
else:
radioButtonList = ["E-commerce CSV (https://www.kaggle.com/datasets/mervemenekse/ecommerce-dataset)",
"Upload my own CSV",
"Upload my own PDF",
f"URL Chat with Google's Latest Earnings ({defaultGoogleURL})",
"Enter my own URL"]
# Add some designs to the radio buttons
st.markdown("""
<style>
.stRadio {
padding: 10px;
border-radius: 5px;
background-color: #f5f5f5;
}
.stRadio input[type="radio"] {
position: absolute;
opacity: 0;
cursor: pointer;
}
.stRadio label {
display: flex;
justify-content: center;
align-items: center;
cursor: pointer;
font-size: 16px;
color: #333;
}
.stRadio label:hover {
color: #000;
}
.stRadio.st-selected input[type="radio"] ~ label {
color: #000;
background-color: #d9d9d9;
}
</style>
""", unsafe_allow_html=True)
genre = st.radio(
"Tired of reading your files? Chat with it using AI! Choose dataset to finetune", radioButtonList, index=0
)
# Initialize language model
load_dotenv(find_dotenv()) # read local .env file
llm = ChatOpenAI(model=OPEN_ROUTER_MODEL, temperature=0.1, openai_api_key=OPEN_ROUTER_KEY, openai_api_base="https://openrouter.ai/api/v1")
pdfCSVURLText = ""
if genre==radioButtonList[0]:
pdfCSVURLText = "CSV"
exampleQuestion = "Question1: What was the most sold item? Question2: What was the most common payment?"
loader = CSVLoader(file_path='EcommerceDataset.csv')
csv_data = loader.load()
# st.write('You selected comedy.')
# else:
# st.write(f'''Password streamlit app: {st.secrets["PSWD"]}''')
elif genre==radioButtonList[1]:
pdfCSVURLText = "CSV"
exampleQuestion = "What are the data columns?"
elif genre==radioButtonList[2]:
pdfCSVURLText = "PDF"
exampleQuestion = "Can you summarize the contents?"
elif genre==radioButtonList[3]:
pdfCSVURLText = "URL"
exampleQuestion = "What is Google's latest earnings?"
elif genre==radioButtonList[4]:
pdfCSVURLText = "URL"
exampleQuestion = "Can you summarize the contents?"
isCustomURL = genre==radioButtonList[4]
urlInput = st.text_input('Enter your own URL', '', placeholder=f"Type your URL here (e.g. {defaultGoogleURL})", disabled=not isCustomURL)
isCustomPDF = genre==radioButtonList[1] or genre==radioButtonList[2]
uploaded_file = st.file_uploader(f"Upload your own {pdfCSVURLText} here", type=pdfCSVURLText.lower(), disabled=not isCustomPDF)
uploadedFilename = ""
if uploaded_file is not None:
with NamedTemporaryFile(dir='.', suffix=f'.{pdfCSVURLText.lower()}') as f:
f.write(uploaded_file.getbuffer())
uploadedFilename = f.name
if genre==radioButtonList[1]: # Custom CSV Upload
loader = CSVLoader(file_path=uploadedFilename)
csv_data = loader.load()
elif genre==radioButtonList[2]: # Custom PDF Upload
loader = PyPDFLoader(uploadedFilename)
pdf_pages = loader.load_and_split()
enableChatBox = False
if genre==radioButtonList[0]: # E-commerce CSV
enableChatBox = True
elif genre==radioButtonList[1]: # Custom CSV Upload
enableChatBox = uploadedFilename[-4:]==".csv"
elif genre==radioButtonList[2]: # Custom PDF Upload
enableChatBox = uploadedFilename[-4:]==".pdf"
elif genre==radioButtonList[3]: # Google Alphabet URL Earnings Report
enableChatBox = True
elif genre==radioButtonList[4]: # Custom URL
enableChatBox = True
chatTextStr = st.text_input(f'Ask me anything about this {pdfCSVURLText}', '', placeholder=f"Type here (e.g. {exampleQuestion})", disabled=not enableChatBox)
chatWithPDFButton = "CLICK HERE TO START CHATTING"
if st.button(chatWithPDFButton, disabled=not enableChatBox and not chatTextStr): # Button Cliked
if genre==radioButtonList[0]: # E-commerce CSV
# Initializing the agent
answer = llm.predict(f'''
I have CSV file contents below:
{str(csv_data)}
{chatTextStr}
''')
st.write(answer)
elif genre==radioButtonList[1]: # Custom CSV Upload
# Initializing the agent
answer = llm.predict(f'''
I have CSV file contents below:
{str(csv_data)}
{chatTextStr}
''')
st.write(answer)
elif genre==radioButtonList[2]: # Custom PDF Upload
pdf_answer = llm.predict(f'''
I have PDF file contents below:
{str(pdf_pages)}
{chatTextStr}
''')
st.write(pdf_answer)
elif genre==radioButtonList[3]: # Google Alphabet URL Earnings Report
loader = WebBaseLoader(defaultGoogleURL)
web_data = loader.load()
answer = llm.predict(f'''
I have website contents below:
{str(web_data)}
{chatTextStr}
''')
st.write(answer)
elif genre==radioButtonList[4]: # Custom URL
loader = WebBaseLoader(urlInput)
web_data = loader.load()
answer = llm.predict(f'''
I have website contents below:
{str(web_data)}
{chatTextStr}
''')
st.write(answer)