Spaces:
Sleeping
Sleeping
import streamlit as st | |
import pandas as pd | |
import matplotlib.pyplot as plt | |
from pandasai import SmartDataframe | |
from pandasai.llm import OpenAI | |
from pandasai.callbacks import BaseCallback | |
from pandasai.responses.response_parser import ResponseParser | |
import os | |
# Set OpenAI API Token | |
OPENAI_API_TOKEN = os.getenv("BIT_OPENAI_API_KEY", "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") | |
# Load OpenAI Mini model | |
llm = OpenAI(api_token=OPENAI_API_TOKEN, model_name="gpt-4o-mini") | |
class StreamlitCallback(BaseCallback): | |
def __init__(self, container) -> None: | |
"""Initialize callback handler.""" | |
self.container = container | |
def on_code(self, response: str): | |
self.container.code(response) | |
class StreamlitResponse(ResponseParser): | |
def __init__(self, context) -> None: | |
super().__init__(context) | |
def format_dataframe(self, result): | |
st.dataframe(result["value"]) | |
return | |
def format_plot(self, result): | |
st.image(result["value"]) | |
return | |
def format_other(self, result): | |
st.write(result["value"]) | |
return | |
def process_file(file): | |
file_extension = file.name.split(".")[-1].lower() | |
try: | |
if file_extension == "csv": | |
# Detect if semicolon or comma is used as a separator | |
first_line = file.readline().decode("utf-8") | |
file.seek(0) # Reset file pointer | |
delimiter = ";" if ";" in first_line else "," | |
df = pd.read_csv(file, delimiter=delimiter) | |
elif file_extension in ["xls", "xlsx"]: | |
import openpyxl # Ensures openpyxl is available | |
df = pd.read_excel(file, engine="openpyxl") | |
else: | |
st.error("Unsupported file format. Please upload a CSV or Excel file.") | |
return None, None | |
except ImportError as e: | |
st.error(f"Error: {e}. Please install 'openpyxl' for Excel support.") | |
return None, None | |
sdf = SmartDataframe(df, config={ | |
"llm": llm, | |
"save_logs": True, | |
"verbose": False, | |
"response_parser": StreamlitResponse | |
}) | |
return df, sdf | |
def main(): | |
st.title("AI-Powered Dataframe Analysis with OpenAI") | |
uploaded_file = st.file_uploader("Upload CSV or Excel File", type=["csv", "xls", "xlsx"]) | |
if uploaded_file: | |
df, sdf = process_file(uploaded_file) | |
if df is not None: | |
st.subheader("π Data Preview") | |
with st.expander("View Data Summary"): | |
st.write("### Data Overview") | |
st.dataframe(df.describe(include='all')) | |
st.write("### First 10 Rows of Data") | |
st.dataframe(df.head(10)) | |
st.write("### Column Details") | |
for col in df.columns: | |
st.write(f"**{col}**") | |
st.dataframe(df[[col]].head(10)) | |
query = st.text_area("Enter Your Query") | |
if query: | |
container = st.container() | |
answer = sdf.chat(query) | |
if __name__ == "__main__": | |
main() | |