Spaces:

Arxived
/

chat-w-dataset

Sleeping

App Files Files Community

chat-w-dataset / app.py

DrishtiSharma

Update app.py

1c6d353 verified 6 months ago

raw

history blame

3.94 kB

	import streamlit as st
	import pandas as pd
	import os
	from pandasai import SmartDataframe
	from pandasai.llm import OpenAI
	import tempfile
	import matplotlib.pyplot as plt
	from datasets import load_dataset
	from langchain_groq import ChatGroq
	from langchain_openai import ChatOpenAI
	import time


	openai_api_key = os.getenv("OPENAI_API_KEY")

	def chat_with_csv(df, prompt):
	llm = OpenAI(api_token=openai_api_key)
	pandas_ai = PandasAI(llm)
	result = pandas_ai.run(df, prompt=prompt)
	return result

	def load_huggingface_dataset(dataset_name):
	progress_bar = st.progress(0)
	try:
	progress_bar.progress(10)
	dataset = load_dataset(dataset_name, name="sample", split="train", trust_remote_code=True, uniform_split=True)
	progress_bar.progress(50)
	if hasattr(dataset, "to_pandas"):
	df = dataset.to_pandas()
	else:
	df = pd.DataFrame(dataset)
	progress_bar.progress(100)
	return df
	except Exception as e:
	progress_bar.progress(0)
	raise e

	def load_uploaded_csv(uploaded_file):
	progress_bar = st.progress(0)
	try:
	progress_bar.progress(10)
	time.sleep(1)
	progress_bar.progress(50)
	df = pd.read_csv(uploaded_file)
	progress_bar.progress(100)
	return df
	except Exception as e:
	progress_bar.progress(0)
	raise e

	def load_dataset_into_session():
	input_option = st.radio(
	"Select Dataset Input:",
	["Use Repo Directory Dataset", "Use Hugging Face Dataset", "Upload CSV File"],
	index=1,
	horizontal=True
	)

	if input_option == "Use Repo Directory Dataset":
	file_path = "./source/test.csv"
	if st.button("Load Dataset"):
	try:
	with st.spinner("Loading dataset from the repo directory..."):
	st.session_state.df = pd.read_csv(file_path)
	st.success(f"File loaded successfully from '{file_path}'!")
	except Exception as e:
	st.error(f"Error loading dataset from the repo directory: {e}")

	elif input_option == "Use Hugging Face Dataset":
	dataset_name = st.text_input("Enter Hugging Face Dataset Name:", value="HUPD/hupd")
	if st.button("Load Dataset"):
	try:
	st.session_state.df = load_huggingface_dataset(dataset_name)
	st.success(f"Hugging Face Dataset '{dataset_name}' loaded successfully!")
	except Exception as e:
	st.error(f"Error loading Hugging Face dataset: {e}")

	elif input_option == "Upload CSV File":
	uploaded_file = st.file_uploader("Upload a CSV File:", type=["csv"])
	if uploaded_file:
	try:
	st.session_state.df = load_uploaded_csv(uploaded_file)
	st.success("File uploaded successfully!")
	except Exception as e:
	st.error(f"Error reading uploaded file: {e}")

	# Streamlit app main
	st.set_page_config(layout='wide')
	st.title("ChatCSV powered by LLM")

	# Ensure session state for the dataframe
	if "df" not in st.session_state:
	st.session_state.df = pd.DataFrame() # Initialize with an empty dataframe

	st.header("Load Your Dataset")
	load_dataset_into_session()

	if not st.session_state.df.empty:
	st.subheader("Dataset Preview")
	st.dataframe(st.session_state.df, use_container_width=True)

	st.subheader("Chat with Your Dataset")
	user_query = st.text_area("Enter your query:")

	if st.button("Run Query"):
	if user_query.strip():
	with st.spinner("Processing your query..."):
	try:
	result = chat_with_csv(st.session_state.df, user_query)
	st.success(result)
	except Exception as e:
	st.error(f"Error processing your query: {e}")
	else:
	st.warning("Please enter a query before running.")