DrishtiSharma commited on
Commit
f121a60
·
verified ·
1 Parent(s): 43de5ca

Update app3.py

Browse files
Files changed (1) hide show
  1. app3.py +105 -75
app3.py CHANGED
@@ -2,15 +2,21 @@ import streamlit as st
2
  import pandas as pd
3
  import plotly.express as px
4
  from pandasai import Agent
 
5
  from langchain_community.embeddings.openai import OpenAIEmbeddings
6
  from langchain_community.vectorstores import FAISS
7
  from langchain_openai import ChatOpenAI
8
  from langchain.chains import RetrievalQA
9
  from langchain.schema import Document
10
  import os
 
 
 
 
 
11
 
12
  # Set the title of the app
13
- st.title("Data Analyzer")
14
 
15
  # Fetch API keys from environment variables
16
  api_key = os.getenv("OPENAI_API_KEY")
@@ -20,85 +26,109 @@ if not api_key or not pandasai_api_key:
20
  st.error(
21
  "API keys not found in the environment. Please set the 'OPENAI_API_KEY' and 'PANDASAI_API_KEY' environment variables."
22
  )
 
23
  else:
24
  # File uploader
25
  uploaded_file = st.file_uploader("Upload an Excel or CSV file", type=["xlsx", "csv"])
26
 
27
  if uploaded_file is not None:
28
- # Load the data
29
- if uploaded_file.name.endswith('.xlsx'):
30
- df = pd.read_excel(uploaded_file)
31
- else:
32
- df = pd.read_csv(uploaded_file)
33
-
34
- st.write("Data Preview:")
35
- st.write(df.head())
36
-
37
- # Set up PandasAI Agent
38
- agent = Agent(df)
39
-
40
- # Convert the DataFrame into documents
41
- documents = [
42
- Document(
43
- page_content=", ".join([f"{col}: {row[col]}" for col in df.columns]),
44
- metadata={"index": index}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
  )
46
- for index, row in df.iterrows()
47
- ]
48
-
49
- # Set up RAG
50
- embeddings = OpenAIEmbeddings()
51
- vectorstore = FAISS.from_documents(documents, embeddings)
52
- retriever = vectorstore.as_retriever()
53
- qa_chain = RetrievalQA.from_chain_type(
54
- llm=ChatOpenAI(),
55
- chain_type="stuff",
56
- retriever=retriever
57
- )
58
-
59
- # Create tabs
60
- tab1, tab2, tab3 = st.tabs(["PandasAI Analysis", "RAG Q&A", "Data Visualization"])
61
-
62
- with tab1:
63
- st.header("Data Analysis using PandasAI")
64
- pandas_question = st.text_input("Ask a question about the data (PandasAI):")
65
- if pandas_question:
66
- result = agent.chat(pandas_question)
67
- st.write("PandasAI Answer:", result)
68
-
69
- with tab2:
70
- st.header("Question Answering using RAG")
71
- rag_question = st.text_input("Ask a question about the data (RAG):")
72
- if rag_question:
73
- result = qa_chain.run(rag_question)
74
- st.write("RAG Answer:", result)
75
-
76
- with tab3:
77
- st.header("Data Visualization")
78
- viz_question = st.text_input("What kind of graph would you like to create? (e.g., 'Show a scatter plot of salary vs experience')")
79
-
80
- if viz_question:
81
- try:
82
- result = agent.chat(viz_question)
83
-
84
- # Since PandasAI output is text, extract executable code
85
- import re
86
- code_pattern = r'```python\n(.*?)\n```'
87
- code_match = re.search(code_pattern, result, re.DOTALL)
88
-
89
- if code_match:
90
- viz_code = code_match.group(1)
91
- # Modify code to use Plotly (px) instead of matplotlib (plt)
92
- viz_code = viz_code.replace('plt.', 'px.')
93
- viz_code = viz_code.replace('plt.show()', 'fig = px.scatter(df, x=x, y=y)')
94
-
95
- # Execute the code and display the chart
96
- exec(viz_code)
97
- st.plotly_chart(fig)
98
- else:
99
- st.write("Unable to generate a graph. Please try a different query.")
100
- except Exception as e:
101
- st.write(f"An error occurred: {str(e)}")
102
- st.write("Please try phrasing your query differently.")
 
 
 
 
 
 
 
103
  else:
104
  st.info("Please upload a file to begin analysis.")
 
2
  import pandas as pd
3
  import plotly.express as px
4
  from pandasai import Agent
5
+ from pandasai.llm.openai import OpenAI
6
  from langchain_community.embeddings.openai import OpenAIEmbeddings
7
  from langchain_community.vectorstores import FAISS
8
  from langchain_openai import ChatOpenAI
9
  from langchain.chains import RetrievalQA
10
  from langchain.schema import Document
11
  import os
12
+ import logging
13
+
14
+ # Configure logging
15
+ logging.basicConfig(level=logging.DEBUG)
16
+ logger = logging.getLogger(__name__)
17
 
18
  # Set the title of the app
19
+ st.title("Data Analyzer on Hugging Face Spaces")
20
 
21
  # Fetch API keys from environment variables
22
  api_key = os.getenv("OPENAI_API_KEY")
 
26
  st.error(
27
  "API keys not found in the environment. Please set the 'OPENAI_API_KEY' and 'PANDASAI_API_KEY' environment variables."
28
  )
29
+ logger.error("API keys not found. Ensure they are set in the environment variables.")
30
  else:
31
  # File uploader
32
  uploaded_file = st.file_uploader("Upload an Excel or CSV file", type=["xlsx", "csv"])
33
 
34
  if uploaded_file is not None:
35
+ try:
36
+ # Load the data
37
+ if uploaded_file.name.endswith('.xlsx'):
38
+ df = pd.read_excel(uploaded_file)
39
+ else:
40
+ df = pd.read_csv(uploaded_file)
41
+
42
+ st.write("Data Preview:")
43
+ st.write(df.head())
44
+ logger.info(f"Uploaded file loaded successfully with shape: {df.shape}")
45
+
46
+ # Initialize PandasAI Agent
47
+ llm = OpenAI(api_key=pandasai_api_key, max_tokens=1500, timeout=60)
48
+ agent = Agent(df, llm=llm)
49
+
50
+ # Convert the DataFrame into documents for RAG
51
+ documents = [
52
+ Document(
53
+ page_content=", ".join([f"{col}: {row[col]}" for col in df.columns if pd.notnull(row[col])]),
54
+ metadata={"index": index}
55
+ )
56
+ for index, row in df.iterrows()
57
+ ]
58
+ logger.info(f"{len(documents)} documents created for RAG.")
59
+
60
+ # Set up RAG
61
+ embeddings = OpenAIEmbeddings()
62
+ vectorstore = FAISS.from_documents(documents, embeddings)
63
+ retriever = vectorstore.as_retriever()
64
+ qa_chain = RetrievalQA.from_chain_type(
65
+ llm=ChatOpenAI(),
66
+ chain_type="stuff",
67
+ retriever=retriever
68
  )
69
+
70
+ # Create tabs
71
+ tab1, tab2, tab3 = st.tabs(["PandasAI Analysis", "RAG Q&A", "Data Visualization"])
72
+
73
+ # Tab 1: PandasAI Analysis
74
+ with tab1:
75
+ st.header("Data Analysis using PandasAI")
76
+ pandas_question = st.text_input("Ask a question about the data (PandasAI):")
77
+ if pandas_question:
78
+ try:
79
+ result = agent.chat(pandas_question)
80
+ if result:
81
+ st.write("PandasAI Answer:", result)
82
+ else:
83
+ st.warning("PandasAI returned no result. Please try another question.")
84
+ except Exception as e:
85
+ st.error(f"Error from PandasAI: {e}")
86
+ logger.error(f"PandasAI error: {e}")
87
+
88
+ # Tab 2: RAG Q&A
89
+ with tab2:
90
+ st.header("Question Answering using RAG")
91
+ rag_question = st.text_input("Ask a question about the data (RAG):")
92
+ if rag_question:
93
+ try:
94
+ result = qa_chain.run(rag_question)
95
+ st.write("RAG Answer:", result)
96
+ except Exception as e:
97
+ st.error(f"Error from RAG Q&A: {e}")
98
+ logger.error(f"RAG error: {e}")
99
+
100
+ # Tab 3: Data Visualization
101
+ with tab3:
102
+ st.header("Data Visualization")
103
+ viz_question = st.text_input("What kind of graph would you like to create? (e.g., 'Show a scatter plot of salary vs experience')")
104
+ if viz_question:
105
+ try:
106
+ result = agent.chat(viz_question)
107
+
108
+ # Since PandasAI output is text, extract executable code
109
+ import re
110
+ code_pattern = r'```python\n(.*?)\n```'
111
+ code_match = re.search(code_pattern, result, re.DOTALL)
112
+
113
+ if code_match:
114
+ viz_code = code_match.group(1)
115
+ logger.debug(f"Extracted visualization code: {viz_code}")
116
+
117
+ # Modify code to use Plotly (px) instead of matplotlib (plt)
118
+ viz_code = viz_code.replace('plt.', 'px.')
119
+ viz_code = viz_code.replace('plt.show()', 'fig = px.scatter(df, x=x, y=y)')
120
+
121
+ # Execute the code and display the chart
122
+ exec(viz_code)
123
+ st.plotly_chart(fig)
124
+ else:
125
+ st.warning("Unable to generate a graph. Please try a different query.")
126
+ logger.warning("No valid visualization code found in PandasAI response.")
127
+ except Exception as e:
128
+ st.error(f"An error occurred: {e}")
129
+ logger.error(f"Visualization error: {e}")
130
+ except Exception as e:
131
+ st.error(f"An error occurred while processing the file: {e}")
132
+ logger.error(f"File processing error: {e}")
133
  else:
134
  st.info("Please upload a file to begin analysis.")