pandasai / app3.py
DrishtiSharma's picture
Update app3.py
f121a60 verified
raw
history blame
5.71 kB
import streamlit as st
import pandas as pd
import plotly.express as px
from pandasai import Agent
from pandasai.llm.openai import OpenAI
from langchain_community.embeddings.openai import OpenAIEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_openai import ChatOpenAI
from langchain.chains import RetrievalQA
from langchain.schema import Document
import os
import logging
# Configure logging
logging.basicConfig(level=logging.DEBUG)
logger = logging.getLogger(__name__)
# Set the title of the app
st.title("Data Analyzer on Hugging Face Spaces")
# Fetch API keys from environment variables
api_key = os.getenv("OPENAI_API_KEY")
pandasai_api_key = os.getenv("PANDASAI_API_KEY")
if not api_key or not pandasai_api_key:
st.error(
"API keys not found in the environment. Please set the 'OPENAI_API_KEY' and 'PANDASAI_API_KEY' environment variables."
)
logger.error("API keys not found. Ensure they are set in the environment variables.")
else:
# File uploader
uploaded_file = st.file_uploader("Upload an Excel or CSV file", type=["xlsx", "csv"])
if uploaded_file is not None:
try:
# Load the data
if uploaded_file.name.endswith('.xlsx'):
df = pd.read_excel(uploaded_file)
else:
df = pd.read_csv(uploaded_file)
st.write("Data Preview:")
st.write(df.head())
logger.info(f"Uploaded file loaded successfully with shape: {df.shape}")
# Initialize PandasAI Agent
llm = OpenAI(api_key=pandasai_api_key, max_tokens=1500, timeout=60)
agent = Agent(df, llm=llm)
# Convert the DataFrame into documents for RAG
documents = [
Document(
page_content=", ".join([f"{col}: {row[col]}" for col in df.columns if pd.notnull(row[col])]),
metadata={"index": index}
)
for index, row in df.iterrows()
]
logger.info(f"{len(documents)} documents created for RAG.")
# Set up RAG
embeddings = OpenAIEmbeddings()
vectorstore = FAISS.from_documents(documents, embeddings)
retriever = vectorstore.as_retriever()
qa_chain = RetrievalQA.from_chain_type(
llm=ChatOpenAI(),
chain_type="stuff",
retriever=retriever
)
# Create tabs
tab1, tab2, tab3 = st.tabs(["PandasAI Analysis", "RAG Q&A", "Data Visualization"])
# Tab 1: PandasAI Analysis
with tab1:
st.header("Data Analysis using PandasAI")
pandas_question = st.text_input("Ask a question about the data (PandasAI):")
if pandas_question:
try:
result = agent.chat(pandas_question)
if result:
st.write("PandasAI Answer:", result)
else:
st.warning("PandasAI returned no result. Please try another question.")
except Exception as e:
st.error(f"Error from PandasAI: {e}")
logger.error(f"PandasAI error: {e}")
# Tab 2: RAG Q&A
with tab2:
st.header("Question Answering using RAG")
rag_question = st.text_input("Ask a question about the data (RAG):")
if rag_question:
try:
result = qa_chain.run(rag_question)
st.write("RAG Answer:", result)
except Exception as e:
st.error(f"Error from RAG Q&A: {e}")
logger.error(f"RAG error: {e}")
# Tab 3: Data Visualization
with tab3:
st.header("Data Visualization")
viz_question = st.text_input("What kind of graph would you like to create? (e.g., 'Show a scatter plot of salary vs experience')")
if viz_question:
try:
result = agent.chat(viz_question)
# Since PandasAI output is text, extract executable code
import re
code_pattern = r'```python\n(.*?)\n```'
code_match = re.search(code_pattern, result, re.DOTALL)
if code_match:
viz_code = code_match.group(1)
logger.debug(f"Extracted visualization code: {viz_code}")
# Modify code to use Plotly (px) instead of matplotlib (plt)
viz_code = viz_code.replace('plt.', 'px.')
viz_code = viz_code.replace('plt.show()', 'fig = px.scatter(df, x=x, y=y)')
# Execute the code and display the chart
exec(viz_code)
st.plotly_chart(fig)
else:
st.warning("Unable to generate a graph. Please try a different query.")
logger.warning("No valid visualization code found in PandasAI response.")
except Exception as e:
st.error(f"An error occurred: {e}")
logger.error(f"Visualization error: {e}")
except Exception as e:
st.error(f"An error occurred while processing the file: {e}")
logger.error(f"File processing error: {e}")
else:
st.info("Please upload a file to begin analysis.")