Spaces:
Running
Running
File size: 4,651 Bytes
b193f65 1c6d353 b193f65 1c6d353 b193f65 329fe9b b193f65 329fe9b b193f65 329fe9b b193f65 329fe9b b193f65 329fe9b b193f65 329fe9b b193f65 329fe9b b193f65 329fe9b b193f65 329fe9b b193f65 329fe9b b193f65 329fe9b b193f65 329fe9b b193f65 329fe9b b193f65 329fe9b b193f65 329fe9b b193f65 329fe9b b193f65 329fe9b b193f65 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 |
import streamlit as st
import pandas as pd
import os
from pandasai import SmartDataframe
from pandasai.llm import OpenAI
import tempfile
import matplotlib.pyplot as plt
from datasets import load_dataset
from langchain_groq import ChatGroq
from langchain_openai import ChatOpenAI
import time
openai_api_key = os.getenv("OPENAI_API_KEY")
# Dataset loading without caching to support progress bar
def load_huggingface_dataset(dataset_name):
# Initialize progress bar
progress_bar = st.progress(0)
try:
# Incrementally update progress
progress_bar.progress(10)
dataset = load_dataset(dataset_name, name="sample", split="train", trust_remote_code=True, uniform_split=True)
progress_bar.progress(50)
if hasattr(dataset, "to_pandas"):
df = dataset.to_pandas()
else:
df = pd.DataFrame(dataset)
progress_bar.progress(100) # Final update to 100%
return df
except Exception as e:
progress_bar.progress(0) # Reset progress bar on failure
raise e
def load_uploaded_csv(uploaded_file):
# Initialize progress bar
progress_bar = st.progress(0)
try:
# Simulate progress
progress_bar.progress(10)
time.sleep(1) # Simulate file processing delay
progress_bar.progress(50)
df = pd.read_csv(uploaded_file)
progress_bar.progress(100) # Final update
return df
except Exception as e:
progress_bar.progress(0) # Reset progress bar on failure
raise e
# Dataset selection logic
def load_dataset_into_session():
input_option = st.radio(
"Select Dataset Input:",
["Use Repo Directory Dataset", "Use Hugging Face Dataset", "Upload CSV File"], index=1, horizontal=True
)
# Option 1: Load dataset from the repo directory
if input_option == "Use Repo Directory Dataset":
file_path = "./source/test.csv"
if st.button("Load Dataset"):
try:
with st.spinner("Loading dataset from the repo directory..."):
st.session_state.df = pd.read_csv(file_path)
st.success(f"File loaded successfully from '{file_path}'!")
except Exception as e:
st.error(f"Error loading dataset from the repo directory: {e}")
# Option 2: Load dataset from Hugging Face
elif input_option == "Use Hugging Face Dataset":
dataset_name = st.text_input(
"Enter Hugging Face Dataset Name:", value="HUPD/hupd"
)
if st.button("Load Dataset"):
try:
st.session_state.df = load_huggingface_dataset(dataset_name)
st.success(f"Hugging Face Dataset '{dataset_name}' loaded successfully!")
except Exception as e:
st.error(f"Error loading Hugging Face dataset: {e}")
# Option 3: Upload CSV File
elif input_option == "Upload CSV File":
uploaded_file = st.file_uploader("Upload a CSV File:", type=["csv"])
if uploaded_file:
try:
st.session_state.df = load_uploaded_csv(uploaded_file)
st.success("File uploaded successfully!")
except Exception as e:
st.error(f"Error reading uploaded file: {e}")
# Load dataset into session
load_dataset_into_session()
if "df" in st.session_state and llm:
df = st.session_state.df
# Display dataset metadata
st.write("### Dataset Metadata")
st.text(f"Number of Rows: {df.shape[0]}")
st.text(f"Number of Columns: {df.shape[1]}")
st.text(f"Column Names: {', '.join(df.columns)}")
# Display dataset preview
st.write("### Dataset Preview")
num_rows = st.slider("Select number of rows to display:", min_value=5, max_value=50, value=10)
st.dataframe(df.head(num_rows))
# Streamlit app main
st.set_page_config(layout='wide')
st.title("ChatCSV powered by LLM")
st.header("Load Your Dataset")
load_dataset_into_session()
if not st.session_state.df.empty:
st.subheader("Dataset Preview")
st.dataframe(st.session_state.df, use_container_width=True)
st.subheader("Chat with Your Dataset")
user_query = st.text_area("Enter your query:")
if st.button("Run Query"):
if user_query.strip():
with st.spinner("Processing your query..."):
try:
result = chat_with_csv(st.session_state.df, user_query)
st.success(result)
except Exception as e:
st.error(f"Error processing your query: {e}")
else:
st.warning("Please enter a query before running.")
|