Spaces:

Arxived
/

pandasai

Sleeping

App Files Files Community

DrishtiSharma commited on Jan 25

Commit

4e9d55c

verified ·

1 Parent(s): e15244a

Update app3.py

Browse files

Files changed (1) hide show

app3.py +58 -25

app3.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import streamlit as st
 import pandas as pd
 import plotly.express as px
 from pandasai import Agent
 from pandasai.llm.openai import OpenAI
 from langchain_community.embeddings.openai import OpenAIEmbeddings
@@ -28,21 +29,59 @@ if not api_key or not pandasai_api_key:
     )
     logger.error("API keys not found. Ensure they are set in the environment variables.")
 else:
-    # File uploader
-    uploaded_file = st.file_uploader("Upload an Excel or CSV file", type=["xlsx", "csv"])
-    if uploaded_file is not None:
         try:
-            # Load the data
-            if uploaded_file.name.endswith('.xlsx'):
-                df = pd.read_excel(uploaded_file)
-            else:
-                df = pd.read_csv(uploaded_file)
-            st.write("Data Preview:")
-            st.write(df.head())
-            logger.info(f"Uploaded file loaded successfully with shape: {df.shape}")
             # Initialize PandasAI Agent
             llm = OpenAI(api_key=pandasai_api_key, max_tokens=1500, timeout=60)
             agent = Agent(df, llm=llm)
@@ -105,8 +144,7 @@ else:
                     try:
                         result = agent.chat(viz_question)
-                        # Since PandasAI output is text, extract executable code
-                        import re
                         code_pattern = r'```python\n(.*?)\n```'
                         code_match = re.search(code_pattern, result, re.DOTALL)
@@ -114,12 +152,9 @@ else:
                             viz_code = code_match.group(1)
                             logger.debug(f"Extracted visualization code: {viz_code}")
-                            # Modify code to use Plotly (px) instead of matplotlib (plt)
                             viz_code = viz_code.replace('plt.', 'px.')
-                            viz_code = viz_code.replace('plt.show()', 'fig = px.scatter(df, x=x, y=y)')
-                            # Execute the code and display the chart
-                            exec(viz_code)
                             st.plotly_chart(fig)
                         else:
                             st.warning("Unable to generate a graph. Please try a different query.")
@@ -128,7 +163,5 @@ else:
                         st.error(f"An error occurred: {e}")
                         logger.error(f"Visualization error: {e}")
         except Exception as e:
-            st.error(f"An error occurred while processing the file: {e}")
-            logger.error(f"File processing error: {e}")
-    else:
-        st.info("Please upload a file to begin analysis.")

 import streamlit as st
 import pandas as pd
 import plotly.express as px
+from datasets import load_dataset
 from pandasai import Agent
 from pandasai.llm.openai import OpenAI
 from langchain_community.embeddings.openai import OpenAIEmbeddings
     )
     logger.error("API keys not found. Ensure they are set in the environment variables.")
 else:
+    def load_dataset_into_session():
+        """Function to load a dataset into the session."""
+        input_option = st.radio("Select Dataset Input:", ["Use Repo Dataset", "Use Hugging Face Dataset", "Upload CSV File"])
+        # Option 1: Use Repo Dataset
+        if input_option == "Use Repo Dataset":
+            file_path = "./source/test.csv"
+            if st.button("Load Repo Dataset"):
+                try:
+                    st.session_state.df = pd.read_csv(file_path)
+                    st.success(f"File loaded successfully from '{file_path}'!")
+                    st.dataframe(st.session_state.df.head(10))
+                except Exception as e:
+                    st.error(f"Error reading file from path: {e}")
+                    logger.error(f"Error reading file from path: {e}")
+        # Option 2: Use Hugging Face Dataset
+        elif input_option == "Use Hugging Face Dataset":
+            dataset_name = st.text_input("Enter Hugging Face Dataset Name:", value="HUPD/hupd")
+            if st.button("Load Hugging Face Dataset"):
+                try:
+                    # Load Hugging Face dataset
+                    dataset = load_dataset(dataset_name, split="train", trust_remote_code=True)
+                    st.session_state.df = pd.DataFrame(dataset)
+                    st.success(f"Dataset '{dataset_name}' loaded successfully!")
+                    st.dataframe(st.session_state.df.head(10))
+                except Exception as e:
+                    st.error(f"Error loading dataset from Hugging Face: {e}")
+                    logger.error(f"Error loading Hugging Face dataset: {e}")
+        # Option 3: Upload CSV File
+        elif input_option == "Upload CSV File":
+            uploaded_file = st.file_uploader("Upload CSV File:", type=["csv"])
+            if uploaded_file:
+                try:
+                    st.session_state.df = pd.read_csv(uploaded_file)
+                    st.success("File uploaded successfully!")
+                    st.dataframe(st.session_state.df.head(10))
+                except Exception as e:
+                    st.error(f"Error reading uploaded file: {e}")
+                    logger.error(f"Error reading uploaded file: {e}")
+    # Initialize session state for DataFrame
+    if "df" not in st.session_state:
+        st.session_state.df = None
+    # Load dataset into session
+    load_dataset_into_session()
+    # Proceed only if a DataFrame is loaded
+    if st.session_state.df is not None:
+        df = st.session_state.df
         try:
             # Initialize PandasAI Agent
             llm = OpenAI(api_key=pandasai_api_key, max_tokens=1500, timeout=60)
             agent = Agent(df, llm=llm)
                     try:
                         result = agent.chat(viz_question)
+                        # Extract Python code for visualization
                         code_pattern = r'```python\n(.*?)\n```'
                         code_match = re.search(code_pattern, result, re.DOTALL)
                             viz_code = code_match.group(1)
                             logger.debug(f"Extracted visualization code: {viz_code}")
+                            # Safeguard: Modify and validate code for Plotly
                             viz_code = viz_code.replace('plt.', 'px.')
+                            exec(viz_code)  # Execute the visualization code
                             st.plotly_chart(fig)
                         else:
                             st.warning("Unable to generate a graph. Please try a different query.")
                         st.error(f"An error occurred: {e}")
                         logger.error(f"Visualization error: {e}")
         except Exception as e:
+            st.error(f"An error occurred while processing the dataset: {e}")
+            logger.error(f"Dataset processing error: {e}")