Spaces:

CosmickVisions
/

Data-Vision

Running

App Files Files Community

CosmickVisions commited on Mar 24

Commit

fef09cf

verified ·

1 Parent(s): 925e1b1

Update app.py

Browse files

Files changed (1) hide show

app.py +73 -117

app.py CHANGED Viewed

@@ -234,7 +234,6 @@ def update_vector_store_with_plot(plot_text, existing_vector_store):
     return existing_vector_store
 def extract_plot_data(plot_info, df):
-    # Updated to handle Plotly.js JSON
     plot_type = plot_info["type"]
     x_col = plot_info["x"]
     y_col = plot_info["y"] if "y" in plot_info else None
@@ -272,16 +271,6 @@ def extract_plot_data(plot_info, df):
                     plot_text += f"{col1} vs {col2}: {corr.loc[col2, col1]:.2f}\n"
     return plot_text
-def generate_3d_scatter_plot(params):
-    df = st.session_state.cleaned_data
-    match = re.search(r"([\w\s]+)\s+vs\s+([\w\s]+)\s+vs\s+([\w\s]+)", params)
-    if match and len(match.groups()) >= 3:
-        x_axis, y_axis, z_axis = match.group(1).strip(), match.group(2).strip(), match.group(3).strip()
-        if x_axis in df.columns and y_axis in df.columns and z_axis in df.columns:
-            fig = px.scatter_3d(df, x=x_axis, y=y_axis, z=z_axis, title=f'3D Scatter Plot of {x_axis} vs {y_axis} vs {z_axis}')
-            return fig.to_json()
-    return None
 def get_chatbot_response(user_input, app_mode, vector_store=None, model="llama3-70b-8192"):
     system_prompt = (
         "You are an AI assistant in Data-Vision Pro, a data analysis app with RAG capabilities. "
@@ -379,34 +368,6 @@ def display_dataset_preview():
         st.dataframe(st.session_state.cleaned_data.head(10), use_container_width=True)
         st.markdown("---")
-def suggest_data_cleaning(df):
-    suggestions = []
-    if df.isna().sum().sum() > 0:
-        for col in df.columns:
-            na_count = df[col].isna().sum()
-            if na_count > 0:
-                if na_count / df.shape[0] > 0.5:
-                    suggestions.append(f"- Drop column '{col}' (>{50}% missing values)")
-                else:
-                    suggestions.append(f"- Impute missing values in column '{col}' ({na_count} missing values)")
-    return "\n".join(suggestions) if suggestions else "No automatic cleaning suggestions."
-def parse_command(command):
-    # ... (Previous command parser) ...
-    elif "show a 3d scatter plot" in command or "3d scatter plot of" in command:
-        params = command.replace("show a 3d scatter plot of", "").replace("3d scatter plot of", "").strip()
-        return generate_3d_scatter_plot, params
-    # ... (rest of the function is same)
-def parse_multistep_command(command):
-    steps = command.split(';')
-    parsed_steps = []
-    for step in steps:
-        func, param = parse_command(step.strip())
-        if func:
-            parsed_steps.append((func, param))
-    return parsed_steps
 # Main App
 def main():
     # Header
@@ -601,89 +562,84 @@ def main():
                 new_df[scale_cols] = scaler.fit_transform(new_df[scale_cols])
                 update_cleaned_data(new_df)
-      elif app_mode == "EDA":
-        st.header("🔍 Exploratory Data Analysis (EDA)")
         if 'cleaned_data' not in st.session_state:
             st.warning("Please upload and clean data first.")
             st.stop()
         df = st.session_state.cleaned_data.copy()
-        st.markdown("### Dataset Overview")
-        col1, col2, col3 = st.columns(3)
-        col1.metric("Rows", df.shape[0])
-        col2.metric("Columns", df.shape[1])
-        col3.metric("Missing Values", df.isna().sum().sum())
-        # Interactive Visualization Builder with Plotly.js
-        st.markdown("### Interactive Visualization Builder")
-        plot_type = st.selectbox("Choose visualization type", [
-            "Scatter Plot", "Histogram", "Box Plot", "Line Chart", "Bar Chart", "Correlation Matrix", "3D Scatter Plot"
-        ])
-        x_axis = st.selectbox("X-axis", df.columns) if plot_type != "Correlation Matrix" else None
-        y_axis = st.selectbox("Y-axis", df.columns) if plot_type in ["Scatter Plot", "Box Plot", "Line Chart", "3D Scatter Plot"] else None
-        z_axis = st.selectbox("Z-axis", df.columns) if plot_type == "3D Scatter Plot" else None
-        generate_plot = st.button("Generate Plot")
-        if generate_plot:
-            fig_json = None
-            try:
-                if plot_type == "Scatter Plot":
-                    fig = px.scatter(df, x=x_axis, y=y_axis, title=f'Scatter Plot of {x_axis} vs {y_axis}')
-                    fig_json = fig.to_json()
-                elif plot_type == "Histogram":
-                    fig = px.histogram(df, x=x_axis, title=f'Histogram of {x_axis}')
-                    fig_json = fig.to_json()
-                elif plot_type == "Box Plot":
-                    fig = px.box(df, x=x_axis, y=y_axis, title=f'Box Plot of {x_axis} vs {y_axis}')
-                    fig_json = fig.to_json()
-                elif plot_type == "Line Chart":
-                    fig = px.line(df, x=x_axis, y=y_axis, title=f'Line Chart of {x_axis} vs {y_axis}')
-                    fig_json = fig.to_json()
-                elif plot_type == "Bar Chart":
-                    fig = px.bar(df, x=x_axis, title=f'Bar Chart of {x_axis}')
-                    fig_json = fig.to_json()
-                elif plot_type == "Correlation Matrix":
-                    numeric_df = df.select_dtypes(include=np.number)
-                    if len(numeric_df.columns) > 1:
-                        corr = numeric_df.corr()
-                        fig = px.imshow(corr, text_auto=True, color_continuous_scale='RdBu_r', zmin=-1, zmax=1, title='Correlation Matrix')
-                        fig_json = fig.to_json()
-                elif plot_type == "3D Scatter Plot":
-                    fig_json = generate_3d_scatter_plot(f"{x_axis} vs {y_axis} vs {z_axis}")
-                if fig_json:
-                    # Render Plotly.js Chart
-                    st.components.v1.html(f"""
-                        <div id="plotly-chart"></div>
-                        <script>
-                            Plotly.newPlot('plotly-chart', {fig_json});
-                        </script>
-                    """, height=600)
-                    # Store Plotly JSON in session state
-                    st.session_state.last_plot = {
-                        "type": plot_type,
-                        "x": x_axis,
-                        "y": y_axis,
-                        "z": z_axis if plot_type == "3D Scatter Plot" else None,
-                        "data": fig_json
-                    }
-                    # Extract and display plot data
-                    plot_text = extract_plot_data(st.session_state.last_plot, df)
-                    st.session_state.vector_store = update_vector_store_with_plot(plot_text, st.session_state.vector_store)
-                    with st.expander("Extracted Plot Data"):
-                        st.text(plot_text)
-            except Exception as e:
-                st.error(f"Couldn't generate plot: {str(e)}")
     # Chatbot Section
     st.markdown("---")

     return existing_vector_store
 def extract_plot_data(plot_info, df):
     plot_type = plot_info["type"]
     x_col = plot_info["x"]
     y_col = plot_info["y"] if "y" in plot_info else None
                     plot_text += f"{col1} vs {col2}: {corr.loc[col2, col1]:.2f}\n"
     return plot_text
 def get_chatbot_response(user_input, app_mode, vector_store=None, model="llama3-70b-8192"):
     system_prompt = (
         "You are an AI assistant in Data-Vision Pro, a data analysis app with RAG capabilities. "
         st.dataframe(st.session_state.cleaned_data.head(10), use_container_width=True)
         st.markdown("---")
 # Main App
 def main():
     # Header
                 new_df[scale_cols] = scaler.fit_transform(new_df[scale_cols])
                 update_cleaned_data(new_df)
+    elif app_mode == "EDA":
+        st.header("🔍 Interactive Data Explorer")
         if 'cleaned_data' not in st.session_state:
             st.warning("Please upload and clean data first.")
             st.stop()
         df = st.session_state.cleaned_data.copy()
+        enhance_section_title("Dataset Overview")
+        with st.container():
+            col1, col2, col3, col4 = st.columns(4)
+            col1.metric("Total Rows", df.shape[0])
+            col2.metric("Total Columns", df.shape[1])
+            missing_percentage = df.isna().sum().sum() / df.size * 100
+            col3.metric("Missing Values", f"{df.isna().sum().sum()} ({missing_percentage:.1f}%)")
+            col4.metric("Duplicates", df.duplicated().sum())
+        tab1, tab2, tab3 = st.tabs(["Quick Preview", "Column Types", "Missing Matrix"])
+        with tab1:
+            st.write("First few rows of the dataset:")
+            st.dataframe(df.head(), use_container_width=True)
+        with tab2:
+            st.write("Column Data Types:")
+            type_counts = df.dtypes.value_counts().reset_index()
+            type_counts.columns = ['Type', 'Count']
+            st.dataframe(type_counts, use_container_width=True)
+        with tab3:
+            st.write("Missing Values Matrix:")
+            fig_missing = px.imshow(df.isna(), color_continuous_scale=['#e0e0e0', '#66c2a5'])
+            fig_missing.update_layout(coloraxis_colorscale=[[0, 'lightgrey'], [1, '#FF4B4B']])
+            st.plotly_chart(fig_missing, use_container_width=True)
+        enhance_section_title("Interactive Visualization Builder")
+        with st.container():
+            col1, col2 = st.columns([1, 3])
+            with col1:
+                plot_type = st.selectbox("Choose visualization type", [
+                    "Scatter Plot", "Histogram", "Box Plot", "Line Chart", "Bar Chart", "Correlation Matrix"
+                ])
+                x_axis = st.selectbox("X-axis", df.columns) if plot_type != "Correlation Matrix" else None
+                y_axis = st.selectbox("Y-axis", df.columns) if plot_type in ["Scatter Plot", "Box Plot", "Line Chart"] else None
+                color_by = st.selectbox("Color encoding", ["None"] + df.columns.tolist(), format_func=lambda x: "No color" if x == "None" else x) if plot_type != "Correlation Matrix" else None
+            with col2:
+                try:
+                    fig = None
+                    if plot_type == "Scatter Plot" and x_axis and y_axis:
+                        fig = px.scatter(df, x=x_axis, y=y_axis, color=color_by if color_by != "None" else None, title=f'Scatter Plot of {x_axis} vs {y_axis}')
+                    elif plot_type == "Histogram" and x_axis:
+                        fig = px.histogram(df, x=x_axis, color=color_by if color_by != "None" else None, nbins=30, title=f'Histogram of {x_axis}')
+                    elif plot_type == "Box Plot" and x_axis and y_axis:
+                        fig = px.box(df, x=x_axis, y=y_axis, color=color_by if color_by != "None" else None, title=f'Box Plot of {x_axis} vs {y_axis}')
+                    elif plot_type == "Line Chart" and x_axis and y_axis:
+                        fig = px.line(df, x=x_axis, y=y_axis, color=color_by if color_by != "None" else None, title=f'Line Chart of {x_axis} vs {y_axis}')
+                    elif plot_type == "Bar Chart" and x_axis:
+                        fig = px.bar(df, x=x_axis, color=color_by if color_by != "None" else None, title=f'Bar Chart of {x_axis}')
+                    elif plot_type == "Correlation Matrix":
+                        numeric_df = df.select_dtypes(include=np.number)
+                        if len(numeric_df.columns) > 1:
+                            corr = numeric_df.corr()
+                            fig = px.imshow(corr, text_auto=True, color_continuous_scale='RdBu_r', zmin=-1, zmax=1, title='Correlation Matrix')
+                    if fig:
+                        fig.update_layout(template="plotly_white")
+                        st.plotly_chart(fig, use_container_width=True)
+                        st.session_state.last_plot = {
+                            "type": plot_type,
+                            "x": x_axis,
+                            "y": y_axis,
+                            "data": df[[x_axis, y_axis]].to_json() if y_axis else df[[x_axis]].to_json()
+                        }
+                        plot_text = extract_plot_data(st.session_state.last_plot, df)
+                        st.session_state.vector_store = update_vector_store_with_plot(plot_text, st.session_state.vector_store)
+                        with st.expander("Extracted Plot Data"):
+                            st.text(plot_text)
+                    else:
+                        st.error("Please provide required inputs for the selected plot type.")
+                except Exception as e:
+                    st.error(f"Couldn't create visualization: {str(e)}")
     # Chatbot Section
     st.markdown("---")