DrishtiSharma commited on
Commit
329fe9b
·
verified ·
1 Parent(s): 1c6d353

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +36 -18
app.py CHANGED
@@ -13,15 +13,12 @@ import time
13
 
14
  openai_api_key = os.getenv("OPENAI_API_KEY")
15
 
16
- def chat_with_csv(df, prompt):
17
- llm = OpenAI(api_token=openai_api_key)
18
- pandas_ai = PandasAI(llm)
19
- result = pandas_ai.run(df, prompt=prompt)
20
- return result
21
-
22
  def load_huggingface_dataset(dataset_name):
 
23
  progress_bar = st.progress(0)
24
  try:
 
25
  progress_bar.progress(10)
26
  dataset = load_dataset(dataset_name, name="sample", split="train", trust_remote_code=True, uniform_split=True)
27
  progress_bar.progress(50)
@@ -29,33 +26,35 @@ def load_huggingface_dataset(dataset_name):
29
  df = dataset.to_pandas()
30
  else:
31
  df = pd.DataFrame(dataset)
32
- progress_bar.progress(100)
33
  return df
34
  except Exception as e:
35
- progress_bar.progress(0)
36
  raise e
37
 
38
  def load_uploaded_csv(uploaded_file):
 
39
  progress_bar = st.progress(0)
40
  try:
 
41
  progress_bar.progress(10)
42
- time.sleep(1)
43
  progress_bar.progress(50)
44
  df = pd.read_csv(uploaded_file)
45
- progress_bar.progress(100)
46
  return df
47
  except Exception as e:
48
- progress_bar.progress(0)
49
  raise e
50
 
 
51
  def load_dataset_into_session():
52
  input_option = st.radio(
53
  "Select Dataset Input:",
54
- ["Use Repo Directory Dataset", "Use Hugging Face Dataset", "Upload CSV File"],
55
- index=1,
56
- horizontal=True
57
  )
58
 
 
59
  if input_option == "Use Repo Directory Dataset":
60
  file_path = "./source/test.csv"
61
  if st.button("Load Dataset"):
@@ -66,8 +65,11 @@ def load_dataset_into_session():
66
  except Exception as e:
67
  st.error(f"Error loading dataset from the repo directory: {e}")
68
 
 
69
  elif input_option == "Use Hugging Face Dataset":
70
- dataset_name = st.text_input("Enter Hugging Face Dataset Name:", value="HUPD/hupd")
 
 
71
  if st.button("Load Dataset"):
72
  try:
73
  st.session_state.df = load_huggingface_dataset(dataset_name)
@@ -75,6 +77,7 @@ def load_dataset_into_session():
75
  except Exception as e:
76
  st.error(f"Error loading Hugging Face dataset: {e}")
77
 
 
78
  elif input_option == "Upload CSV File":
79
  uploaded_file = st.file_uploader("Upload a CSV File:", type=["csv"])
80
  if uploaded_file:
@@ -84,13 +87,28 @@ def load_dataset_into_session():
84
  except Exception as e:
85
  st.error(f"Error reading uploaded file: {e}")
86
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
87
  # Streamlit app main
88
  st.set_page_config(layout='wide')
89
  st.title("ChatCSV powered by LLM")
90
 
91
- # Ensure session state for the dataframe
92
- if "df" not in st.session_state:
93
- st.session_state.df = pd.DataFrame() # Initialize with an empty dataframe
94
 
95
  st.header("Load Your Dataset")
96
  load_dataset_into_session()
 
13
 
14
  openai_api_key = os.getenv("OPENAI_API_KEY")
15
 
16
+ # Dataset loading without caching to support progress bar
 
 
 
 
 
17
  def load_huggingface_dataset(dataset_name):
18
+ # Initialize progress bar
19
  progress_bar = st.progress(0)
20
  try:
21
+ # Incrementally update progress
22
  progress_bar.progress(10)
23
  dataset = load_dataset(dataset_name, name="sample", split="train", trust_remote_code=True, uniform_split=True)
24
  progress_bar.progress(50)
 
26
  df = dataset.to_pandas()
27
  else:
28
  df = pd.DataFrame(dataset)
29
+ progress_bar.progress(100) # Final update to 100%
30
  return df
31
  except Exception as e:
32
+ progress_bar.progress(0) # Reset progress bar on failure
33
  raise e
34
 
35
  def load_uploaded_csv(uploaded_file):
36
+ # Initialize progress bar
37
  progress_bar = st.progress(0)
38
  try:
39
+ # Simulate progress
40
  progress_bar.progress(10)
41
+ time.sleep(1) # Simulate file processing delay
42
  progress_bar.progress(50)
43
  df = pd.read_csv(uploaded_file)
44
+ progress_bar.progress(100) # Final update
45
  return df
46
  except Exception as e:
47
+ progress_bar.progress(0) # Reset progress bar on failure
48
  raise e
49
 
50
+ # Dataset selection logic
51
  def load_dataset_into_session():
52
  input_option = st.radio(
53
  "Select Dataset Input:",
54
+ ["Use Repo Directory Dataset", "Use Hugging Face Dataset", "Upload CSV File"], index=1, horizontal=True
 
 
55
  )
56
 
57
+ # Option 1: Load dataset from the repo directory
58
  if input_option == "Use Repo Directory Dataset":
59
  file_path = "./source/test.csv"
60
  if st.button("Load Dataset"):
 
65
  except Exception as e:
66
  st.error(f"Error loading dataset from the repo directory: {e}")
67
 
68
+ # Option 2: Load dataset from Hugging Face
69
  elif input_option == "Use Hugging Face Dataset":
70
+ dataset_name = st.text_input(
71
+ "Enter Hugging Face Dataset Name:", value="HUPD/hupd"
72
+ )
73
  if st.button("Load Dataset"):
74
  try:
75
  st.session_state.df = load_huggingface_dataset(dataset_name)
 
77
  except Exception as e:
78
  st.error(f"Error loading Hugging Face dataset: {e}")
79
 
80
+ # Option 3: Upload CSV File
81
  elif input_option == "Upload CSV File":
82
  uploaded_file = st.file_uploader("Upload a CSV File:", type=["csv"])
83
  if uploaded_file:
 
87
  except Exception as e:
88
  st.error(f"Error reading uploaded file: {e}")
89
 
90
+ # Load dataset into session
91
+ load_dataset_into_session()
92
+
93
+ if "df" in st.session_state and llm:
94
+ df = st.session_state.df
95
+
96
+ # Display dataset metadata
97
+ st.write("### Dataset Metadata")
98
+ st.text(f"Number of Rows: {df.shape[0]}")
99
+ st.text(f"Number of Columns: {df.shape[1]}")
100
+ st.text(f"Column Names: {', '.join(df.columns)}")
101
+
102
+ # Display dataset preview
103
+ st.write("### Dataset Preview")
104
+ num_rows = st.slider("Select number of rows to display:", min_value=5, max_value=50, value=10)
105
+ st.dataframe(df.head(num_rows))
106
+
107
+
108
  # Streamlit app main
109
  st.set_page_config(layout='wide')
110
  st.title("ChatCSV powered by LLM")
111
 
 
 
 
112
 
113
  st.header("Load Your Dataset")
114
  load_dataset_into_session()