Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -69,29 +69,29 @@ def analyze_data(data, visualization_type, class_size=10):
|
|
69 |
# Combine numerical and non-numerical summaries
|
70 |
numeric_data = data.select_dtypes(include=[np.number])
|
71 |
|
72 |
-
|
73 |
-
# Visualization logic
|
74 |
-
if visualization_type == "Heatmap" and not numeric_data.empty:
|
75 |
-
st.subheader("Correlation Heatmap")
|
76 |
-
fig, ax = plt.subplots(figsize=(8, 6))
|
77 |
-
sns.heatmap(numeric_data.corr(), annot=True, ax=ax, cmap="coolwarm", fmt=".2f")
|
78 |
-
st.pyplot(fig)
|
79 |
-
|
80 |
-
elif visualization_type == "Bar Chart" and not numeric_data.empty:
|
81 |
st.subheader("Bar Chart")
|
82 |
-
x_col = st.selectbox("Select the X-axis column for the Bar Chart:", data.columns)
|
83 |
-
y_col = st.selectbox("Select the Y-axis column for the Bar Chart:", data.columns)
|
84 |
|
85 |
fig, ax = plt.subplots(figsize=(8, 6))
|
86 |
data.groupby(x_col)[y_col].sum().plot(kind='bar', ax=ax)
|
87 |
ax.set_xlabel(x_col)
|
88 |
ax.set_ylabel(y_col)
|
89 |
st.pyplot(fig)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
90 |
|
91 |
elif visualization_type == "Line Graph" and not numeric_data.empty:
|
92 |
st.subheader("Line Graph")
|
93 |
-
x_col = st.selectbox("Select the X-axis column for the Line Graph:", numeric_data.columns)
|
94 |
-
y_col = st.selectbox("Select the Y-axis column for the Line Graph:", numeric_data.columns)
|
95 |
|
96 |
fig, ax = plt.subplots(figsize=(8, 6))
|
97 |
ax.plot(data[x_col], data[y_col])
|
@@ -99,25 +99,8 @@ def analyze_data(data, visualization_type, class_size=10):
|
|
99 |
ax.set_ylabel(y_col)
|
100 |
st.pyplot(fig)
|
101 |
|
102 |
-
elif visualization_type == "Scatter Plot" and not numeric_data.empty:
|
103 |
-
st.subheader("Scatter Plot")
|
104 |
-
x_col = st.selectbox("Select the X-axis column for the Scatter Plot:", numeric_data.columns)
|
105 |
-
y_col = st.selectbox("Select the Y-axis column for the Scatter Plot:", numeric_data.columns)
|
106 |
|
107 |
-
fig, ax = plt.subplots(figsize=(8, 6))
|
108 |
-
ax.scatter(data[x_col], data[y_col])
|
109 |
-
ax.set_xlabel(x_col)
|
110 |
-
ax.set_ylabel(y_col)
|
111 |
-
st.pyplot(fig)
|
112 |
|
113 |
-
elif visualization_type == "Histogram" and not numeric_data.empty:
|
114 |
-
st.subheader("Histogram")
|
115 |
-
column = st.selectbox("Select a column for the Histogram:", numeric_data.columns)
|
116 |
-
fig, ax = plt.subplots(figsize=(8, 6))
|
117 |
-
data[column].plot(kind='hist', bins=class_size, ax=ax)
|
118 |
-
ax.set_xlabel(column)
|
119 |
-
ax.set_ylabel("Frequency")
|
120 |
-
st.pyplot(fig)
|
121 |
|
122 |
elif visualization_type == "Area Chart" and not numeric_data.empty:
|
123 |
st.subheader("Area Chart")
|
@@ -137,11 +120,12 @@ def analyze_data(data, visualization_type, class_size=10):
|
|
137 |
|
138 |
# Function to generate a prompt based on the data analysis
|
139 |
def generate_groq_prompt(data, visualization_type, class_size):
|
140 |
-
# Convert DataFrame to a string without the index
|
141 |
-
|
142 |
-
|
|
|
143 |
prompt = f"""
|
144 |
-
Here is the summary statistics for the dataset:
|
145 |
{data_without_index}
|
146 |
|
147 |
The user has selected the '{visualization_type}' visualization type with a class size of {class_size}.
|
@@ -162,7 +146,7 @@ if uploaded_file is not None:
|
|
162 |
if data is not None:
|
163 |
data = preprocess_dataframe(data) # Fix serialization issues
|
164 |
st.subheader("Uploaded Data")
|
165 |
-
st.
|
166 |
|
167 |
# Visualization Selection
|
168 |
visualization_type = st.selectbox(
|
@@ -175,7 +159,10 @@ if uploaded_file is not None:
|
|
175 |
|
176 |
# Perform Analysis and Visualization
|
177 |
prompt = analyze_data(data, visualization_type, class_size)
|
178 |
-
|
|
|
|
|
|
|
179 |
|
180 |
# Chat with Groq Section
|
181 |
st.subheader("Chat with Groq")
|
|
|
69 |
# Combine numerical and non-numerical summaries
|
70 |
numeric_data = data.select_dtypes(include=[np.number])
|
71 |
|
72 |
+
if visualization_type == "Bar Chart" and not numeric_data.empty:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
73 |
st.subheader("Bar Chart")
|
74 |
+
x_col = st.selectbox("Select the X-axis column for the Bar Chart (Non-Numeric):", data.columns)
|
75 |
+
y_col = st.selectbox("Select the Y-axis column for the Bar Chart (Numeric):", data.columns)
|
76 |
|
77 |
fig, ax = plt.subplots(figsize=(8, 6))
|
78 |
data.groupby(x_col)[y_col].sum().plot(kind='bar', ax=ax)
|
79 |
ax.set_xlabel(x_col)
|
80 |
ax.set_ylabel(y_col)
|
81 |
st.pyplot(fig)
|
82 |
+
# Visualization logic
|
83 |
+
elif visualization_type == "Heatmap" and not numeric_data.empty:
|
84 |
+
st.subheader("Correlation Heatmap")
|
85 |
+
fig, ax = plt.subplots(figsize=(8, 6))
|
86 |
+
sns.heatmap(numeric_data.corr(), annot=True, ax=ax, cmap="coolwarm", fmt=".2f")
|
87 |
+
st.pyplot(fig)
|
88 |
+
|
89 |
+
|
90 |
|
91 |
elif visualization_type == "Line Graph" and not numeric_data.empty:
|
92 |
st.subheader("Line Graph")
|
93 |
+
x_col = st.selectbox("Select the X-axis column for the Line Graph (Non-Numeric):", numeric_data.columns)
|
94 |
+
y_col = st.selectbox("Select the Y-axis column for the Line Graph (Numeric):", numeric_data.columns)
|
95 |
|
96 |
fig, ax = plt.subplots(figsize=(8, 6))
|
97 |
ax.plot(data[x_col], data[y_col])
|
|
|
99 |
ax.set_ylabel(y_col)
|
100 |
st.pyplot(fig)
|
101 |
|
|
|
|
|
|
|
|
|
102 |
|
|
|
|
|
|
|
|
|
|
|
103 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
104 |
|
105 |
elif visualization_type == "Area Chart" and not numeric_data.empty:
|
106 |
st.subheader("Area Chart")
|
|
|
120 |
|
121 |
# Function to generate a prompt based on the data analysis
|
122 |
def generate_groq_prompt(data, visualization_type, class_size):
|
123 |
+
# Convert a small sample of the DataFrame to a string without the index
|
124 |
+
sample_data = data.head(10) # Include only the first 10 rows for clarity
|
125 |
+
data_without_index = sample_data.to_string(index=False)
|
126 |
+
|
127 |
prompt = f"""
|
128 |
+
Here is the summary statistics for the dataset (showing a sample of 10 rows for clarity):
|
129 |
{data_without_index}
|
130 |
|
131 |
The user has selected the '{visualization_type}' visualization type with a class size of {class_size}.
|
|
|
146 |
if data is not None:
|
147 |
data = preprocess_dataframe(data) # Fix serialization issues
|
148 |
st.subheader("Uploaded Data")
|
149 |
+
st.dataframe(data) # Show raw data in tabular form
|
150 |
|
151 |
# Visualization Selection
|
152 |
visualization_type = st.selectbox(
|
|
|
159 |
|
160 |
# Perform Analysis and Visualization
|
161 |
prompt = analyze_data(data, visualization_type, class_size)
|
162 |
+
|
163 |
+
# Display prompt sent to Groq with preserved formatting
|
164 |
+
st.subheader("Prompt Sent to Groq")
|
165 |
+
st.text_area("Preview of Prompt", prompt, height=200)
|
166 |
|
167 |
# Chat with Groq Section
|
168 |
st.subheader("Chat with Groq")
|