DrishtiSharma commited on
Commit
0aa55b4
·
verified ·
1 Parent(s): 427da54

Update app3.py

Browse files
Files changed (1) hide show
  1. app3.py +142 -147
app3.py CHANGED
@@ -3,7 +3,6 @@ import pandas as pd
3
  import plotly.express as px
4
  from datasets import load_dataset
5
  from pandasai import Agent
6
- from pandasai.llm.openai import OpenAI
7
  from langchain_community.embeddings.openai import OpenAIEmbeddings
8
  from langchain_community.vectorstores import FAISS
9
  from langchain_openai import ChatOpenAI
@@ -16,160 +15,156 @@ import logging
16
  logging.basicConfig(level=logging.DEBUG)
17
  logger = logging.getLogger(__name__)
18
 
19
- # Set the title of the app
20
- st.title("Data Analyzer")
21
 
22
- # Fetch API keys from environment variables
23
- api_key = os.getenv("OPENAI_API_KEY")
24
- pandasai_api_key = os.getenv("PANDASAI_API_KEY")
25
 
26
- if not api_key or not pandasai_api_key:
27
- st.error(
28
- "API keys not found in the environment. Please set the 'OPENAI_API_KEY' and 'PANDASAI_API_KEY' environment variables."
 
 
29
  )
30
- logger.error("API keys not found. Ensure they are set in the environment variables.")
31
- else:
32
- def load_dataset_into_session():
33
- """Function to load a dataset into the session."""
34
- input_option = st.radio("Select Dataset Input:", ["Use Repo Dataset", "Use Hugging Face Dataset", "Upload CSV File"])
35
-
36
- # Option 1: Use Repo Dataset
37
- if input_option == "Use Repo Dataset":
38
- file_path = "./source/test.csv"
39
- if st.button("Load Repo Dataset"):
40
- try:
41
- st.session_state.df = pd.read_csv(file_path)
42
- st.success(f"File loaded successfully from '{file_path}'!")
43
- st.dataframe(st.session_state.df.head(10))
44
- except Exception as e:
45
- st.error(f"Error reading file from path: {e}")
46
- logger.error(f"Error reading file from path: {e}")
47
 
48
- # Option 2: Use Hugging Face Dataset
49
- elif input_option == "Use Hugging Face Dataset":
50
- dataset_name = st.text_input("Enter Hugging Face Dataset Name:", value="HUPD/hupd")
51
- if st.button("Load Hugging Face Dataset"):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52
  try:
53
- # Load Hugging Face dataset
54
- dataset = load_dataset(dataset_name, split="train", trust_remote_code=True)
55
-
56
- # Convert dataset to Pandas DataFrame
57
- if isinstance(dataset, dict) or isinstance(dataset, list):
58
- st.session_state.df = pd.DataFrame(dataset)
59
- elif hasattr(dataset, 'to_pandas'):
60
- st.session_state.df = dataset.to_pandas()
61
- else:
62
- raise ValueError("Invalid input data. Cannot convert it to a DataFrame.")
63
-
64
- st.success(f"Dataset '{dataset_name}' loaded successfully!")
65
- st.dataframe(st.session_state.df.head(10))
66
  except Exception as e:
67
- st.error(f"Error loading dataset from Hugging Face: {e}")
68
- logger.error(f"Error loading Hugging Face dataset: {e}")
69
 
70
- # Option 3: Upload CSV File
71
- elif input_option == "Upload CSV File":
72
- uploaded_file = st.file_uploader("Upload CSV File:", type=["csv"])
73
- if uploaded_file:
 
74
  try:
75
- st.session_state.df = pd.read_csv(uploaded_file)
76
- st.success("File uploaded successfully!")
77
- st.dataframe(st.session_state.df.head(10))
78
  except Exception as e:
79
- st.error(f"Error reading uploaded file: {e}")
80
- logger.error(f"Error reading uploaded file: {e}")
81
-
82
- # Initialize session state for DataFrame
83
- if "df" not in st.session_state:
84
- st.session_state.df = None
85
-
86
- # Load dataset into session
87
- load_dataset_into_session()
88
-
89
- # Proceed only if a DataFrame is loaded
90
- if st.session_state.df is not None:
91
- df = st.session_state.df
92
- try:
93
- # Initialize PandasAI Agent
94
- llm = OpenAI(api_key=pandasai_api_key, max_tokens=1500, timeout=60)
95
- agent = Agent(df, llm=llm)
96
-
97
- # Convert the DataFrame into documents for RAG
98
- documents = [
99
- Document(
100
- page_content=", ".join([f"{col}: {row[col]}" for col in df.columns if pd.notnull(row[col])]),
101
- metadata={"index": index}
102
- )
103
- for index, row in df.iterrows()
104
- ]
105
- logger.info(f"{len(documents)} documents created for RAG.")
106
-
107
- # Set up RAG
108
- embeddings = OpenAIEmbeddings()
109
- vectorstore = FAISS.from_documents(documents, embeddings)
110
- retriever = vectorstore.as_retriever()
111
- qa_chain = RetrievalQA.from_chain_type(
112
- llm=ChatOpenAI(),
113
- chain_type="stuff",
114
- retriever=retriever
115
  )
 
 
 
 
 
 
 
 
 
116
 
117
- # Create tabs
118
- tab1, tab2, tab3 = st.tabs(["PandasAI Analysis", "RAG QA", "Data Visualization"])
119
-
120
- # Tab 1: PandasAI Analysis
121
- with tab1:
122
- st.header("Data Analysis using PandasAI")
123
- pandas_question = st.text_input("Ask a question about the data (PandasAI):")
124
- if pandas_question:
125
- try:
126
- result = agent.chat(pandas_question)
127
- if result:
128
- st.write("PandasAI Answer:", result)
129
- else:
130
- st.warning("PandasAI returned no result. Please try another question.")
131
- except Exception as e:
132
- st.error(f"Error from PandasAI: {e}")
133
- logger.error(f"PandasAI error: {e}")
134
-
135
- # Tab 2: RAG QA
136
- with tab2:
137
- st.header("Question Answering using RAG")
138
- rag_question = st.text_input("Ask a question about the data (RAG):")
139
- if rag_question:
140
- try:
141
- result = qa_chain.run(rag_question)
142
- st.write("RAG Answer:", result)
143
- except Exception as e:
144
- st.error(f"Error from RAG Q&A: {e}")
145
- logger.error(f"RAG error: {e}")
146
-
147
- # Tab 3: Data Visualization
148
- with tab3:
149
- st.header("Data Visualization")
150
- viz_question = st.text_input("What kind of graph would you like to create? (e.g., 'Show a scatter plot of salary vs experience')")
151
- if viz_question:
152
- try:
153
- result = agent.chat(viz_question)
154
-
155
- # Extract Python code for visualization
156
- code_pattern = r'```python\n(.*?)\n```'
157
- code_match = re.search(code_pattern, result, re.DOTALL)
158
-
159
- if code_match:
160
- viz_code = code_match.group(1)
161
- logger.debug(f"Extracted visualization code: {viz_code}")
162
-
163
- # Safeguard: Modify and validate code for Plotly
164
- viz_code = viz_code.replace('plt.', 'px.')
165
- exec(viz_code) # Execute the visualization code
166
- st.plotly_chart(fig)
167
- else:
168
- st.warning("Unable to generate a graph. Please try a different query.")
169
- logger.warning("No valid visualization code found in PandasAI response.")
170
- except Exception as e:
171
- st.error(f"An error occurred: {e}")
172
- logger.error(f"Visualization error: {e}")
173
- except Exception as e:
174
- st.error(f"An error occurred while processing the dataset: {e}")
175
- logger.error(f"Dataset processing error: {e}")
 
3
  import plotly.express as px
4
  from datasets import load_dataset
5
  from pandasai import Agent
 
6
  from langchain_community.embeddings.openai import OpenAIEmbeddings
7
  from langchain_community.vectorstores import FAISS
8
  from langchain_openai import ChatOpenAI
 
15
  logging.basicConfig(level=logging.DEBUG)
16
  logger = logging.getLogger(__name__)
17
 
18
+ # Title of the app
19
+ st.title("PandasAI Data Analyzer with RAG")
20
 
21
+ # Sidebar for API keys
22
+ api_key = st.sidebar.text_input("OpenAI API Key", type="password")
23
+ pandasai_api_key = st.sidebar.text_input("PandasAI API Key", type="password")
24
 
25
+ # Function to load datasets into session
26
+ def load_dataset_into_session():
27
+ input_option = st.radio(
28
+ "Select Dataset Input:",
29
+ ["Use Repo Directory Dataset", "Use Hugging Face Dataset", "Upload CSV File"],
30
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
 
32
+ # Option 1: Load dataset from the repo directory
33
+ if input_option == "Use Repo Directory Dataset":
34
+ file_path = "./source/test.csv"
35
+ if st.button("Load Repo Dataset"):
36
+ try:
37
+ st.session_state.df = pd.read_csv(file_path)
38
+ st.success(f"File loaded successfully from '{file_path}'!")
39
+ st.dataframe(st.session_state.df.head(10))
40
+ except Exception as e:
41
+ st.error(f"Error loading dataset from the repo directory: {e}")
42
+ logger.error(f"Error loading dataset from repo directory: {e}")
43
+
44
+ # Option 2: Load dataset from Hugging Face
45
+ elif input_option == "Use Hugging Face Dataset":
46
+ dataset_name = st.text_input(
47
+ "Enter Hugging Face Dataset Name:", value="HUPD/hupd"
48
+ )
49
+ if st.button("Load Hugging Face Dataset"):
50
+ try:
51
+ dataset = load_dataset(dataset_name, split="train", trust_remote_code=True)
52
+ # Convert Hugging Face dataset to Pandas DataFrame
53
+ if hasattr(dataset, "to_pandas"):
54
+ st.session_state.df = dataset.to_pandas()
55
+ else:
56
+ st.session_state.df = pd.DataFrame(dataset)
57
+ st.success(f"Hugging Face Dataset '{dataset_name}' loaded successfully!")
58
+ st.dataframe(st.session_state.df.head(10))
59
+ except Exception as e:
60
+ st.error(f"Error loading Hugging Face dataset: {e}")
61
+ logger.error(f"Error loading Hugging Face dataset: {e}")
62
+
63
+ # Option 3: Upload CSV File
64
+ elif input_option == "Upload CSV File":
65
+ uploaded_file = st.file_uploader("Upload a CSV File:", type=["csv"])
66
+ if uploaded_file:
67
+ try:
68
+ st.session_state.df = pd.read_csv(uploaded_file)
69
+ st.success("File uploaded successfully!")
70
+ st.dataframe(st.session_state.df.head(10))
71
+ except Exception as e:
72
+ st.error(f"Error reading uploaded file: {e}")
73
+ logger.error(f"Error reading uploaded file: {e}")
74
+
75
+ # Ensure session state for the DataFrame
76
+ if "df" not in st.session_state:
77
+ st.session_state.df = None
78
+
79
+ # Load dataset into session
80
+ load_dataset_into_session()
81
+
82
+ # Check if a dataset is loaded
83
+ if st.session_state.df is not None:
84
+ df = st.session_state.df
85
+ try:
86
+ # Set API keys in environment variables
87
+ os.environ["OPENAI_API_KEY"] = api_key
88
+ os.environ["PANDASAI_API_KEY"] = pandasai_api_key
89
+
90
+ # Initialize PandasAI Agent
91
+ agent = Agent(df)
92
+
93
+ # Convert DataFrame to documents for RAG
94
+ documents = [
95
+ Document(
96
+ page_content=", ".join(
97
+ [f"{col}: {row[col]}" for col in df.columns if pd.notnull(row[col])]
98
+ ),
99
+ metadata={"index": index},
100
+ )
101
+ for index, row in df.iterrows()
102
+ ]
103
+
104
+ # Set up RAG
105
+ embeddings = OpenAIEmbeddings()
106
+ vectorstore = FAISS.from_documents(documents, embeddings)
107
+ retriever = vectorstore.as_retriever()
108
+ qa_chain = RetrievalQA.from_chain_type(
109
+ llm=ChatOpenAI(),
110
+ chain_type="stuff",
111
+ retriever=retriever,
112
+ )
113
+
114
+ # Create tabs
115
+ tab1, tab2, tab3 = st.tabs(
116
+ ["PandasAI Analysis", "RAG Q&A", "Data Visualization"]
117
+ )
118
+
119
+ # Tab 1: PandasAI Analysis
120
+ with tab1:
121
+ st.header("PandasAI Analysis")
122
+ pandas_question = st.text_input("Ask a question about the data (PandasAI):")
123
+ if pandas_question:
124
  try:
125
+ result = agent.chat(pandas_question)
126
+ st.write("PandasAI Answer:", result)
 
 
 
 
 
 
 
 
 
 
 
127
  except Exception as e:
128
+ st.error(f"Error during PandasAI Analysis: {e}")
 
129
 
130
+ # Tab 2: RAG Q&A
131
+ with tab2:
132
+ st.header("RAG Q&A")
133
+ rag_question = st.text_input("Ask a question about the data (RAG):")
134
+ if rag_question:
135
  try:
136
+ result = qa_chain.run(rag_question)
137
+ st.write("RAG Answer:", result)
 
138
  except Exception as e:
139
+ st.error(f"Error during RAG Q&A: {e}")
140
+
141
+ # Tab 3: Data Visualization
142
+ with tab3:
143
+ st.header("Data Visualization")
144
+ viz_question = st.text_input(
145
+ "What kind of graph would you like to create? (e.g., 'Show a scatter plot of salary vs experience')"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
146
  )
147
+ if viz_question:
148
+ try:
149
+ result = agent.chat(viz_question)
150
+
151
+ # Extract Python code for visualization
152
+ import re
153
+
154
+ code_pattern = r"```python\n(.*?)\n```"
155
+ code_match = re.search(code_pattern, result, re.DOTALL)
156
 
157
+ if code_match:
158
+ viz_code = code_match.group(1)
159
+ # Replace matplotlib (plt) code with Plotly (px)
160
+ viz_code = viz_code.replace("plt.", "px.")
161
+ exec(viz_code) # Execute the visualization code
162
+ st.plotly_chart(fig)
163
+ else:
164
+ st.warning("Could not generate a graph. Try a different query.")
165
+ except Exception as e:
166
+ st.error(f"Error during Data Visualization: {e}")
167
+ except Exception as e:
168
+ st.error(f"An error occurred during processing: {e}")
169
+ else:
170
+ st.info("Please load a dataset to start analysis.")