File size: 3,294 Bytes
23599de
8ec17cc
 
23599de
 
8ec17cc
23599de
8ec17cc
23599de
8ec17cc
 
 
 
 
23599de
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8ec17cc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23599de
8ec17cc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
import streamlit as st
import pandas as pd
import matplotlib.pyplot as plt
from pandasai import SmartDataframe
from pandasai.llm import OpenAI
from pandasai.callbacks import BaseCallback
from pandasai.responses.response_parser import ResponseParser
import os

# Set OpenAI API Token
OPENAI_API_TOKEN = os.getenv("BIT_OPENAI_API_KEY", "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")

# Load OpenAI Mini model
llm = OpenAI(api_token=OPENAI_API_TOKEN, model_name="gpt-4o-mini")

class StreamlitCallback(BaseCallback):
    def __init__(self, container) -> None:
        """Initialize callback handler."""
        self.container = container

    def on_code(self, response: str):
        self.container.code(response)

class StreamlitResponse(ResponseParser):
    def __init__(self, context) -> None:
        super().__init__(context)

    def format_dataframe(self, result):
        st.dataframe(result["value"])
        return

    def format_plot(self, result):
        st.image(result["value"])
        return

    def format_other(self, result):
        st.write(result["value"])
        return

def process_file(file):
    file_extension = file.name.split(".")[-1].lower()
    try:
        if file_extension == "csv":
            # Detect if semicolon or comma is used as a separator
            first_line = file.readline().decode("utf-8")
            file.seek(0)  # Reset file pointer
            delimiter = ";" if ";" in first_line else ","
            df = pd.read_csv(file, delimiter=delimiter)
        elif file_extension in ["xls", "xlsx"]:
            import openpyxl  # Ensures openpyxl is available
            df = pd.read_excel(file, engine="openpyxl")
        else:
            st.error("Unsupported file format. Please upload a CSV or Excel file.")
            return None, None
    except ImportError as e:
        st.error(f"Error: {e}. Please install 'openpyxl' for Excel support.")
        return None, None
    
    sdf = SmartDataframe(df, config={
                "llm": llm,
                "save_logs": True,
                "verbose": False,
                "response_parser": StreamlitResponse
                })
    return df, sdf

def main():
    st.title("AI-Powered Dataframe Analysis with OpenAI")
    uploaded_file = st.file_uploader("Upload CSV or Excel File", type=["csv", "xls", "xlsx"])
    
    if uploaded_file:
        df, sdf = process_file(uploaded_file)
        if df is not None:
            st.subheader("πŸ”Ž Data Preview")
            
            with st.expander("View Data Summary"):
                st.write("### Data Overview")
                st.dataframe(df.describe(include='all'))
                
                st.write("### First 10 Rows of Data")
                st.dataframe(df.head(10))
                
                st.write("### Column Details")
                for col in df.columns:
                    st.write(f"**{col}**")
                    st.dataframe(df[[col]].head(10))
            
            query = st.text_area("Enter Your Query")
            
            if query:
                container = st.container()
                answer = sdf.chat(query)

if __name__ == "__main__":
    main()