DrishtiSharma commited on
Commit
7d0e4c5
·
verified ·
1 Parent(s): 53a3b74

Create app3.py

Browse files
Files changed (1) hide show
  1. app3.py +104 -0
app3.py ADDED
@@ -0,0 +1,104 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import plotly.express as px
4
+ from pandasai import Agent
5
+ from langchain_community.embeddings.openai import OpenAIEmbeddings
6
+ from langchain_community.vectorstores import FAISS
7
+ from langchain_openai import ChatOpenAI
8
+ from langchain.chains import RetrievalQA
9
+ from langchain.schema import Document
10
+ import os
11
+
12
+ # Set the title of the app
13
+ st.title("Data Analyzer")
14
+
15
+ # Fetch API keys from environment variables
16
+ api_key = os.getenv("OPENAI_API_KEY")
17
+ pandasai_api_key = os.getenv("PANDASAI_API_KEY")
18
+
19
+ if not api_key or not pandasai_api_key:
20
+ st.error(
21
+ "API keys not found in the environment. Please set the 'OPENAI_API_KEY' and 'PANDASAI_API_KEY' environment variables."
22
+ )
23
+ else:
24
+ # File uploader
25
+ uploaded_file = st.file_uploader("Upload an Excel or CSV file", type=["xlsx", "csv"])
26
+
27
+ if uploaded_file is not None:
28
+ # Load the data
29
+ if uploaded_file.name.endswith('.xlsx'):
30
+ df = pd.read_excel(uploaded_file)
31
+ else:
32
+ df = pd.read_csv(uploaded_file)
33
+
34
+ st.write("Data Preview:")
35
+ st.write(df.head())
36
+
37
+ # Set up PandasAI Agent
38
+ agent = Agent(df)
39
+
40
+ # Convert the DataFrame into documents
41
+ documents = [
42
+ Document(
43
+ page_content=", ".join([f"{col}: {row[col]}" for col in df.columns]),
44
+ metadata={"index": index}
45
+ )
46
+ for index, row in df.iterrows()
47
+ ]
48
+
49
+ # Set up RAG
50
+ embeddings = OpenAIEmbeddings()
51
+ vectorstore = FAISS.from_documents(documents, embeddings)
52
+ retriever = vectorstore.as_retriever()
53
+ qa_chain = RetrievalQA.from_chain_type(
54
+ llm=ChatOpenAI(),
55
+ chain_type="stuff",
56
+ retriever=retriever
57
+ )
58
+
59
+ # Create tabs
60
+ tab1, tab2, tab3 = st.tabs(["PandasAI Analysis", "RAG Q&A", "Data Visualization"])
61
+
62
+ with tab1:
63
+ st.header("Data Analysis using PandasAI")
64
+ pandas_question = st.text_input("Ask a question about the data (PandasAI):")
65
+ if pandas_question:
66
+ result = agent.chat(pandas_question)
67
+ st.write("PandasAI Answer:", result)
68
+
69
+ with tab2:
70
+ st.header("Question Answering using RAG")
71
+ rag_question = st.text_input("Ask a question about the data (RAG):")
72
+ if rag_question:
73
+ result = qa_chain.run(rag_question)
74
+ st.write("RAG Answer:", result)
75
+
76
+ with tab3:
77
+ st.header("Data Visualization")
78
+ viz_question = st.text_input("What kind of graph would you like to create? (e.g., 'Show a scatter plot of salary vs experience')")
79
+
80
+ if viz_question:
81
+ try:
82
+ result = agent.chat(viz_question)
83
+
84
+ # Since PandasAI output is text, extract executable code
85
+ import re
86
+ code_pattern = r'```python\n(.*?)\n```'
87
+ code_match = re.search(code_pattern, result, re.DOTALL)
88
+
89
+ if code_match:
90
+ viz_code = code_match.group(1)
91
+ # Modify code to use Plotly (px) instead of matplotlib (plt)
92
+ viz_code = viz_code.replace('plt.', 'px.')
93
+ viz_code = viz_code.replace('plt.show()', 'fig = px.scatter(df, x=x, y=y)')
94
+
95
+ # Execute the code and display the chart
96
+ exec(viz_code)
97
+ st.plotly_chart(fig)
98
+ else:
99
+ st.write("Unable to generate a graph. Please try a different query.")
100
+ except Exception as e:
101
+ st.write(f"An error occurred: {str(e)}")
102
+ st.write("Please try phrasing your query differently.")
103
+ else:
104
+ st.info("Please upload a file to begin analysis.")