import streamlit as st import pandas as pd import os from pandasai import SmartDataframe from pandasai.llm import OpenAI import tempfile import matplotlib.pyplot as plt from datasets import load_dataset import time # Set Streamlit page config FIRST st.set_page_config(layout='wide') # Set API key openai_api_key = os.getenv("OPENAI_API_KEY") # Define the LLM llm = OpenAI(api_token=openai_api_key) # Chat with CSV def chat_with_csv(df, prompt): pandas_ai = SmartDataframe(df, config={"llm": llm}) result = pandas_ai.chat(prompt) # FIX: Use `chat` instead of `run` return result # Dataset loading without caching to support progress bar def load_huggingface_dataset(dataset_name): progress_bar = st.progress(0) try: progress_bar.progress(10) dataset = load_dataset(dataset_name, name="sample", split="train", trust_remote_code=True, uniform_split=True) progress_bar.progress(50) if hasattr(dataset, "to_pandas"): df = dataset.to_pandas() else: df = pd.DataFrame(dataset) progress_bar.progress(100) return df except Exception as e: progress_bar.progress(0) raise e # Load CSV file def load_uploaded_csv(uploaded_file): progress_bar = st.progress(0) try: progress_bar.progress(10) time.sleep(1) progress_bar.progress(50) df = pd.read_csv(uploaded_file) progress_bar.progress(100) return df except Exception as e: progress_bar.progress(0) raise e # Dataset selection logic def load_dataset_into_session(): input_option = st.radio( "Select Dataset Input:", ["Use Repo Directory Dataset", "Use Hugging Face Dataset", "Upload CSV File"], index=1, horizontal=True ) if input_option == "Use Repo Directory Dataset": file_path = "./source/test.csv" if st.button("Load Dataset"): try: with st.spinner("Loading dataset from the repo directory..."): st.session_state.df = pd.read_csv(file_path) st.success(f"File loaded successfully from '{file_path}'!") except Exception as e: st.error(f"Error loading dataset from the repo directory: {e}") elif input_option == "Use Hugging Face Dataset": dataset_name = st.text_input("Enter Hugging Face Dataset Name:", value="HUPD/hupd") if st.button("Load Dataset"): try: st.session_state.df = load_huggingface_dataset(dataset_name) st.success(f"Hugging Face Dataset '{dataset_name}' loaded successfully!") except Exception as e: st.error(f"Error loading Hugging Face dataset: {e}") elif input_option == "Upload CSV File": uploaded_file = st.file_uploader("Upload a CSV File:", type=["csv"]) if uploaded_file: try: st.session_state.df = load_uploaded_csv(uploaded_file) st.success("File uploaded successfully!") except Exception as e: st.error(f"Error reading uploaded file: {e}") # Streamlit app main st.title("ChatCSV") # Ensure session state for dataframe if "df" not in st.session_state: st.session_state.df = pd.DataFrame() # Ensure session state for user query if "user_query" not in st.session_state: st.session_state.user_query = "" st.header("Load Your Dataset") load_dataset_into_session() if "df" in st.session_state and not st.session_state.df.empty: st.subheader("Dataset Preview") num_rows = st.slider("Select number of rows to display:", min_value=5, max_value=50, value=10) st.dataframe(st.session_state.df.head(num_rows)) st.subheader("Chat with Your Dataset") # Text area for user query with session state persistence st.session_state.user_query = st.text_area("Enter your query:", value=st.session_state.user_query) if st.button("Run Query"): if st.session_state.user_query.strip(): with st.spinner("Processing your query..."): try: # FIX: Use the correct `chat` method result = chat_with_csv(st.session_state.df, st.session_state.user_query) st.success(result) except Exception as e: st.error(f"Error processing your query: {e}") else: st.warning("Please enter a query before running.")