Spaces:

Arxived
/

pandasai

Sleeping

App Files Files Community

DrishtiSharma commited on Jan 25

Commit

0aa55b4

verified ·

1 Parent(s): 427da54

Update app3.py

Browse files

Files changed (1) hide show

app3.py +142 -147

app3.py CHANGED Viewed

@@ -3,7 +3,6 @@ import pandas as pd
 import plotly.express as px
 from datasets import load_dataset
 from pandasai import Agent
-from pandasai.llm.openai import OpenAI
 from langchain_community.embeddings.openai import OpenAIEmbeddings
 from langchain_community.vectorstores import FAISS
 from langchain_openai import ChatOpenAI
@@ -16,160 +15,156 @@ import logging
 logging.basicConfig(level=logging.DEBUG)
 logger = logging.getLogger(__name__)
-# Set the title of the app
-st.title("Data Analyzer")
-# Fetch API keys from environment variables
-api_key = os.getenv("OPENAI_API_KEY")
-pandasai_api_key = os.getenv("PANDASAI_API_KEY")
-if not api_key or not pandasai_api_key:
-    st.error(
-        "API keys not found in the environment. Please set the 'OPENAI_API_KEY' and 'PANDASAI_API_KEY' environment variables."
     )
-    logger.error("API keys not found. Ensure they are set in the environment variables.")
-else:
-    def load_dataset_into_session():
-        """Function to load a dataset into the session."""
-        input_option = st.radio("Select Dataset Input:", ["Use Repo Dataset", "Use Hugging Face Dataset", "Upload CSV File"])
-        # Option 1: Use Repo Dataset
-        if input_option == "Use Repo Dataset":
-            file_path = "./source/test.csv"
-            if st.button("Load Repo Dataset"):
-                try:
-                    st.session_state.df = pd.read_csv(file_path)
-                    st.success(f"File loaded successfully from '{file_path}'!")
-                    st.dataframe(st.session_state.df.head(10))
-                except Exception as e:
-                    st.error(f"Error reading file from path: {e}")
-                    logger.error(f"Error reading file from path: {e}")
-        # Option 2: Use Hugging Face Dataset
-        elif input_option == "Use Hugging Face Dataset":
-            dataset_name = st.text_input("Enter Hugging Face Dataset Name:", value="HUPD/hupd")
-            if st.button("Load Hugging Face Dataset"):
                 try:
-                    # Load Hugging Face dataset
-                    dataset = load_dataset(dataset_name, split="train", trust_remote_code=True)
-                    # Convert dataset to Pandas DataFrame
-                    if isinstance(dataset, dict) or isinstance(dataset, list):
-                        st.session_state.df = pd.DataFrame(dataset)
-                    elif hasattr(dataset, 'to_pandas'):
-                        st.session_state.df = dataset.to_pandas()
-                    else:
-                        raise ValueError("Invalid input data. Cannot convert it to a DataFrame.")
-                    st.success(f"Dataset '{dataset_name}' loaded successfully!")
-                    st.dataframe(st.session_state.df.head(10))
                 except Exception as e:
-                    st.error(f"Error loading dataset from Hugging Face: {e}")
-                    logger.error(f"Error loading Hugging Face dataset: {e}")
-        # Option 3: Upload CSV File
-        elif input_option == "Upload CSV File":
-            uploaded_file = st.file_uploader("Upload CSV File:", type=["csv"])
-            if uploaded_file:
                 try:
-                    st.session_state.df = pd.read_csv(uploaded_file)
-                    st.success("File uploaded successfully!")
-                    st.dataframe(st.session_state.df.head(10))
                 except Exception as e:
-                    st.error(f"Error reading uploaded file: {e}")
-                    logger.error(f"Error reading uploaded file: {e}")
-    # Initialize session state for DataFrame
-    if "df" not in st.session_state:
-        st.session_state.df = None
-    # Load dataset into session
-    load_dataset_into_session()
-    # Proceed only if a DataFrame is loaded
-    if st.session_state.df is not None:
-        df = st.session_state.df
-        try:
-            # Initialize PandasAI Agent
-            llm = OpenAI(api_key=pandasai_api_key, max_tokens=1500, timeout=60)
-            agent = Agent(df, llm=llm)
-            # Convert the DataFrame into documents for RAG
-            documents = [
-                Document(
-                    page_content=", ".join([f"{col}: {row[col]}" for col in df.columns if pd.notnull(row[col])]),
-                    metadata={"index": index}
-                )
-                for index, row in df.iterrows()
-            ]
-            logger.info(f"{len(documents)} documents created for RAG.")
-            # Set up RAG
-            embeddings = OpenAIEmbeddings()
-            vectorstore = FAISS.from_documents(documents, embeddings)
-            retriever = vectorstore.as_retriever()
-            qa_chain = RetrievalQA.from_chain_type(
-                llm=ChatOpenAI(),
-                chain_type="stuff",
-                retriever=retriever
             )
-            # Create tabs
-            tab1, tab2, tab3 = st.tabs(["PandasAI Analysis", "RAG QA", "Data Visualization"])
-            # Tab 1: PandasAI Analysis
-            with tab1:
-                st.header("Data Analysis using PandasAI")
-                pandas_question = st.text_input("Ask a question about the data (PandasAI):")
-                if pandas_question:
-                    try:
-                        result = agent.chat(pandas_question)
-                        if result:
-                            st.write("PandasAI Answer:", result)
-                        else:
-                            st.warning("PandasAI returned no result. Please try another question.")
-                    except Exception as e:
-                        st.error(f"Error from PandasAI: {e}")
-                        logger.error(f"PandasAI error: {e}")
-            # Tab 2: RAG QA
-            with tab2:
-                st.header("Question Answering using RAG")
-                rag_question = st.text_input("Ask a question about the data (RAG):")
-                if rag_question:
-                    try:
-                        result = qa_chain.run(rag_question)
-                        st.write("RAG Answer:", result)
-                    except Exception as e:
-                        st.error(f"Error from RAG Q&A: {e}")
-                        logger.error(f"RAG error: {e}")
-            # Tab 3: Data Visualization
-            with tab3:
-                st.header("Data Visualization")
-                viz_question = st.text_input("What kind of graph would you like to create? (e.g., 'Show a scatter plot of salary vs experience')")
-                if viz_question:
-                    try:
-                        result = agent.chat(viz_question)
-                        # Extract Python code for visualization
-                        code_pattern = r'```python\n(.*?)\n```'
-                        code_match = re.search(code_pattern, result, re.DOTALL)
-                        if code_match:
-                            viz_code = code_match.group(1)
-                            logger.debug(f"Extracted visualization code: {viz_code}")
-                            # Safeguard: Modify and validate code for Plotly
-                            viz_code = viz_code.replace('plt.', 'px.')
-                            exec(viz_code)  # Execute the visualization code
-                            st.plotly_chart(fig)
-                        else:
-                            st.warning("Unable to generate a graph. Please try a different query.")
-                            logger.warning("No valid visualization code found in PandasAI response.")
-                    except Exception as e:
-                        st.error(f"An error occurred: {e}")
-                        logger.error(f"Visualization error: {e}")
-        except Exception as e:
-            st.error(f"An error occurred while processing the dataset: {e}")
-            logger.error(f"Dataset processing error: {e}")

 import plotly.express as px
 from datasets import load_dataset
 from pandasai import Agent
 from langchain_community.embeddings.openai import OpenAIEmbeddings
 from langchain_community.vectorstores import FAISS
 from langchain_openai import ChatOpenAI
 logging.basicConfig(level=logging.DEBUG)
 logger = logging.getLogger(__name__)
+# Title of the app
+st.title("PandasAI Data Analyzer with RAG")
+# Sidebar for API keys
+api_key = st.sidebar.text_input("OpenAI API Key", type="password")
+pandasai_api_key = st.sidebar.text_input("PandasAI API Key", type="password")
+# Function to load datasets into session
+def load_dataset_into_session():
+    input_option = st.radio(
+        "Select Dataset Input:",
+        ["Use Repo Directory Dataset", "Use Hugging Face Dataset", "Upload CSV File"],
     )
+    # Option 1: Load dataset from the repo directory
+    if input_option == "Use Repo Directory Dataset":
+        file_path = "./source/test.csv"
+        if st.button("Load Repo Dataset"):
+            try:
+                st.session_state.df = pd.read_csv(file_path)
+                st.success(f"File loaded successfully from '{file_path}'!")
+                st.dataframe(st.session_state.df.head(10))
+            except Exception as e:
+                st.error(f"Error loading dataset from the repo directory: {e}")
+                logger.error(f"Error loading dataset from repo directory: {e}")
+    # Option 2: Load dataset from Hugging Face
+    elif input_option == "Use Hugging Face Dataset":
+        dataset_name = st.text_input(
+            "Enter Hugging Face Dataset Name:", value="HUPD/hupd"
+        )
+        if st.button("Load Hugging Face Dataset"):
+            try:
+                dataset = load_dataset(dataset_name, split="train", trust_remote_code=True)
+                # Convert Hugging Face dataset to Pandas DataFrame
+                if hasattr(dataset, "to_pandas"):
+                    st.session_state.df = dataset.to_pandas()
+                else:
+                    st.session_state.df = pd.DataFrame(dataset)
+                st.success(f"Hugging Face Dataset '{dataset_name}' loaded successfully!")
+                st.dataframe(st.session_state.df.head(10))
+            except Exception as e:
+                st.error(f"Error loading Hugging Face dataset: {e}")
+                logger.error(f"Error loading Hugging Face dataset: {e}")
+    # Option 3: Upload CSV File
+    elif input_option == "Upload CSV File":
+        uploaded_file = st.file_uploader("Upload a CSV File:", type=["csv"])
+        if uploaded_file:
+            try:
+                st.session_state.df = pd.read_csv(uploaded_file)
+                st.success("File uploaded successfully!")
+                st.dataframe(st.session_state.df.head(10))
+            except Exception as e:
+                st.error(f"Error reading uploaded file: {e}")
+                logger.error(f"Error reading uploaded file: {e}")
+# Ensure session state for the DataFrame
+if "df" not in st.session_state:
+    st.session_state.df = None
+# Load dataset into session
+load_dataset_into_session()
+# Check if a dataset is loaded
+if st.session_state.df is not None:
+    df = st.session_state.df
+    try:
+        # Set API keys in environment variables
+        os.environ["OPENAI_API_KEY"] = api_key
+        os.environ["PANDASAI_API_KEY"] = pandasai_api_key
+        # Initialize PandasAI Agent
+        agent = Agent(df)
+        # Convert DataFrame to documents for RAG
+        documents = [
+            Document(
+                page_content=", ".join(
+                    [f"{col}: {row[col]}" for col in df.columns if pd.notnull(row[col])]
+                ),
+                metadata={"index": index},
+            )
+            for index, row in df.iterrows()
+        ]
+        # Set up RAG
+        embeddings = OpenAIEmbeddings()
+        vectorstore = FAISS.from_documents(documents, embeddings)
+        retriever = vectorstore.as_retriever()
+        qa_chain = RetrievalQA.from_chain_type(
+            llm=ChatOpenAI(),
+            chain_type="stuff",
+            retriever=retriever,
+        )
+        # Create tabs
+        tab1, tab2, tab3 = st.tabs(
+            ["PandasAI Analysis", "RAG Q&A", "Data Visualization"]
+        )
+        # Tab 1: PandasAI Analysis
+        with tab1:
+            st.header("PandasAI Analysis")
+            pandas_question = st.text_input("Ask a question about the data (PandasAI):")
+            if pandas_question:
                 try:
+                    result = agent.chat(pandas_question)
+                    st.write("PandasAI Answer:", result)
                 except Exception as e:
+                    st.error(f"Error during PandasAI Analysis: {e}")
+        # Tab 2: RAG Q&A
+        with tab2:
+            st.header("RAG Q&A")
+            rag_question = st.text_input("Ask a question about the data (RAG):")
+            if rag_question:
                 try:
+                    result = qa_chain.run(rag_question)
+                    st.write("RAG Answer:", result)
                 except Exception as e:
+                    st.error(f"Error during RAG Q&A: {e}")
+        # Tab 3: Data Visualization
+        with tab3:
+            st.header("Data Visualization")
+            viz_question = st.text_input(
+                "What kind of graph would you like to create? (e.g., 'Show a scatter plot of salary vs experience')"
             )
+            if viz_question:
+                try:
+                    result = agent.chat(viz_question)
+                    # Extract Python code for visualization
+                    import re
+                    code_pattern = r"```python\n(.*?)\n```"
+                    code_match = re.search(code_pattern, result, re.DOTALL)
+                    if code_match:
+                        viz_code = code_match.group(1)
+                        # Replace matplotlib (plt) code with Plotly (px)
+                        viz_code = viz_code.replace("plt.", "px.")
+                        exec(viz_code)  # Execute the visualization code
+                        st.plotly_chart(fig)
+                    else:
+                        st.warning("Could not generate a graph. Try a different query.")
+                except Exception as e:
+                    st.error(f"Error during Data Visualization: {e}")
+    except Exception as e:
+        st.error(f"An error occurred during processing: {e}")
+else:
+    st.info("Please load a dataset to start analysis.")