Spaces:

Arxived
/

pandasai

Sleeping

App Files Files Community

DrishtiSharma commited on Jan 25

Commit

7d0e4c5

verified ·

1 Parent(s): 53a3b74

Create app3.py

Browse files

Files changed (1) hide show

app3.py +104 -0

app3.py ADDED Viewed

	@@ -0,0 +1,104 @@

+import streamlit as st
+import pandas as pd
+import plotly.express as px
+from pandasai import Agent
+from langchain_community.embeddings.openai import OpenAIEmbeddings
+from langchain_community.vectorstores import FAISS
+from langchain_openai import ChatOpenAI
+from langchain.chains import RetrievalQA
+from langchain.schema import Document
+import os
+# Set the title of the app
+st.title("Data Analyzer")
+# Fetch API keys from environment variables
+api_key = os.getenv("OPENAI_API_KEY")
+pandasai_api_key = os.getenv("PANDASAI_API_KEY")
+if not api_key or not pandasai_api_key:
+    st.error(
+        "API keys not found in the environment. Please set the 'OPENAI_API_KEY' and 'PANDASAI_API_KEY' environment variables."
+    )
+else:
+    # File uploader
+    uploaded_file = st.file_uploader("Upload an Excel or CSV file", type=["xlsx", "csv"])
+    if uploaded_file is not None:
+        # Load the data
+        if uploaded_file.name.endswith('.xlsx'):
+            df = pd.read_excel(uploaded_file)
+        else:
+            df = pd.read_csv(uploaded_file)
+        st.write("Data Preview:")
+        st.write(df.head())
+        # Set up PandasAI Agent
+        agent = Agent(df)
+        # Convert the DataFrame into documents
+        documents = [
+            Document(
+                page_content=", ".join([f"{col}: {row[col]}" for col in df.columns]),
+                metadata={"index": index}
+            )
+            for index, row in df.iterrows()
+        ]
+        # Set up RAG
+        embeddings = OpenAIEmbeddings()
+        vectorstore = FAISS.from_documents(documents, embeddings)
+        retriever = vectorstore.as_retriever()
+        qa_chain = RetrievalQA.from_chain_type(
+            llm=ChatOpenAI(),
+            chain_type="stuff",
+            retriever=retriever
+        )
+        # Create tabs
+        tab1, tab2, tab3 = st.tabs(["PandasAI Analysis", "RAG Q&A", "Data Visualization"])
+        with tab1:
+            st.header("Data Analysis using PandasAI")
+            pandas_question = st.text_input("Ask a question about the data (PandasAI):")
+            if pandas_question:
+                result = agent.chat(pandas_question)
+                st.write("PandasAI Answer:", result)
+        with tab2:
+            st.header("Question Answering using RAG")
+            rag_question = st.text_input("Ask a question about the data (RAG):")
+            if rag_question:
+                result = qa_chain.run(rag_question)
+                st.write("RAG Answer:", result)
+        with tab3:
+            st.header("Data Visualization")
+            viz_question = st.text_input("What kind of graph would you like to create? (e.g., 'Show a scatter plot of salary vs experience')")
+            if viz_question:
+                try:
+                    result = agent.chat(viz_question)
+                    # Since PandasAI output is text, extract executable code
+                    import re
+                    code_pattern = r'```python\n(.*?)\n```'
+                    code_match = re.search(code_pattern, result, re.DOTALL)
+                    if code_match:
+                        viz_code = code_match.group(1)
+                        # Modify code to use Plotly (px) instead of matplotlib (plt)
+                        viz_code = viz_code.replace('plt.', 'px.')
+                        viz_code = viz_code.replace('plt.show()', 'fig = px.scatter(df, x=x, y=y)')
+                        # Execute the code and display the chart
+                        exec(viz_code)
+                        st.plotly_chart(fig)
+                    else:
+                        st.write("Unable to generate a graph. Please try a different query.")
+                except Exception as e:
+                    st.write(f"An error occurred: {str(e)}")
+                    st.write("Please try phrasing your query differently.")
+    else:
+        st.info("Please upload a file to begin analysis.")