Spaces:

CosmickVisions
/

Data-Vision

Running

App Files Files Community

CosmickVisions commited on Mar 16

Commit

682d1ef

verified ·

1 Parent(s): 7e22295

Update app.py

Browse files

Files changed (1) hide show

app.py +74 -19

app.py CHANGED Viewed

@@ -156,24 +156,86 @@ def create_vector_store(df_text):
     os.unlink(temp_path)
     return vector_store
 def get_chatbot_response(user_input, app_mode, vector_store=None, model="llama3-70b-8192"):
-    """Get response from Groq with vector store context"""
     system_prompt = (
         "You are an AI assistant in Data-Vision Pro, a data analysis app with RAG capabilities. "
         f"The user is on the '{app_mode}' page:\n"
         "- **Data Upload**: Upload CSV/XLSX files, view stats, or generate reports.\n"
         "- **Data Cleaning**: Clean data (e.g., handle missing values, encode variables).\n"
-        "- **EDA**: Visualize data (e.g., scatter plots, histograms).\n"
     )
     context = ""
     if vector_store:
         docs = vector_store.similarity_search(user_input, k=3)
         if docs:
-            context = "\n\nDataset Context:\n" + "\n".join([f"- {doc.page_content}" for doc in docs])
-            system_prompt += f"Use this dataset context to augment your response:\n{context}"
     else:
-        system_prompt += "No dataset is loaded. Assist based on app functionality."
     try:
         response = client.chat.completions.create(
@@ -230,20 +292,8 @@ def analyze_plot():
         return "No plot available to analyze."
     plot_info = st.session_state.last_plot
     df = pd.read_json(plot_info["data"])
-    plot_type = plot_info["type"]
-    x_col = plot_info["x"]
-    y_col = plot_info["y"] if "y" in plot_info else None
-    if plot_type == "Scatter Plot" and y_col:
-        correlation = df[x_col].corr(df[y_col])
-        strength = "strong" if abs(correlation) > 0.7 else "moderate" if abs(correlation) > 0.3 else "weak"
-        direction = "positive" if correlation > 0 else "negative"
-        return f"The scatter plot of {x_col} vs {y_col} shows a {strength} {direction} correlation (Pearson r = {correlation:.2f})."
-    elif plot_type == "Histogram":
-        skewness = df[x_col].skew()
-        skew_desc = "positively skewed" if skewness > 1 else "negatively skewed" if skewness < -1 else "approximately symmetric"
-        return f"The histogram of {x_col} is {skew_desc} (skewness = {skewness:.2f})."
-    return "Inference not available for this plot type."
 def parse_command(command):
     command = command.lower().strip()
@@ -529,6 +579,11 @@ def main():
                             "y": y_axis,
                             "data": df[[x_axis, y_axis]].to_json() if y_axis else df[[x_axis]].to_json()
                         }
                     else:
                         st.error("Please provide required inputs for the selected plot type.")
                 except Exception as e:

     os.unlink(temp_path)
     return vector_store
+def update_vector_store_with_plot(plot_text, existing_vector_store):
+    """Update the FAISS vector store with plot-derived text"""
+    with tempfile.NamedTemporaryFile(mode='w', suffix='.txt', delete=False) as temp_file:
+        temp_file.write(plot_text)
+        temp_path = temp_file.name
+    loader = TextLoader(temp_path)
+    documents = loader.load()
+    text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)
+    texts = text_splitter.split_documents(documents)
+    if existing_vector_store:
+        existing_vector_store.add_documents(texts)
+    else:
+        existing_vector_store = FAISS.from_documents(texts, embeddings)
+    os.unlink(temp_path)
+    return existing_vector_store
+def extract_plot_data(plot_info, df):
+    """Extract numerical data from the last generated plot and convert to text"""
+    plot_type = plot_info["type"]
+    x_col = plot_info["x"]
+    y_col = plot_info["y"] if "y" in plot_info else None
+    data = pd.read_json(plot_info["data"])
+    plot_text = f"Plot Type: {plot_type}\n"
+    plot_text += f"X-Axis: {x_col}\n"
+    if y_col:
+        plot_text += f"Y-Axis: {y_col}\n"
+    if plot_type == "Scatter Plot" and y_col:
+        correlation = data[x_col].corr(data[y_col])
+        slope, intercept, r_value, p_value, std_err = stats.linregress(data[x_col].dropna(), data[y_col].dropna())
+        plot_text += f"Correlation: {correlation:.2f}\n"
+        plot_text += f"Linear Regression: Slope={slope:.2f}, Intercept={intercept:.2f}, R²={r_value**2:.2f}, p-value={p_value:.4f}\n"
+        plot_text += f"X Stats: Mean={data[x_col].mean():.2f}, Std={data[x_col].std():.2f}, Min={data[x_col].min():.2f}, Max={data[x_col].max():.2f}\n"
+        plot_text += f"Y Stats: Mean={data[y_col].mean():.2f}, Std={data[y_col].std():.2f}, Min={data[y_col].min():.2f}, Max={data[y_col].max():.2f}\n"
+    elif plot_type == "Histogram":
+        plot_text += f"Stats: Mean={data[x_col].mean():.2f}, Median={data[x_col].median():.2f}, Std={data[x_col].std():.2f}\n"
+        plot_text += f"Skewness: {data[x_col].skew():.2f}\n"
+        plot_text += f"Range: [{data[x_col].min():.2f}, {data[x_col].max():.2f}]\n"
+    elif plot_type == "Box Plot" and y_col:
+        q1, q3 = data[y_col].quantile(0.25), data[y_col].quantile(0.75)
+        iqr = q3 - q1
+        plot_text += f"Y Stats: Median={data[y_col].median():.2f}, Q1={q1:.2f}, Q3={q3:.2f}, IQR={iqr:.2f}\n"
+        plot_text += f"Outliers: {len(data[y_col][(data[y_col] < q1 - 1.5 * iqr) | (data[y_col] > q3 + 1.5 * iqr)])} potential outliers\n"
+    elif plot_type == "Line Chart" and y_col:
+        plot_text += f"Y Stats: Mean={data[y_col].mean():.2f}, Std={data[y_col].std():.2f}, Trend={'increasing' if data[y_col].iloc[-1] > data[y_col].iloc[0] else 'decreasing'}\n"
+    elif plot_type == "Bar Chart":
+        plot_text += f"Counts: {data[x_col].value_counts().to_dict()}\n"
+    elif plot_type == "Correlation Matrix":
+        corr = data.corr()
+        plot_text += "Correlation Matrix:\n"
+        for col1 in corr.columns:
+            for col2 in corr.index:
+                if col1 < col2:  # Avoid duplicates
+                    plot_text += f"{col1} vs {col2}: {corr.loc[col2, col1]:.2f}\n"
+    return plot_text
 def get_chatbot_response(user_input, app_mode, vector_store=None, model="llama3-70b-8192"):
+    """Get response from Groq with vector store context including plot data"""
     system_prompt = (
         "You are an AI assistant in Data-Vision Pro, a data analysis app with RAG capabilities. "
         f"The user is on the '{app_mode}' page:\n"
         "- **Data Upload**: Upload CSV/XLSX files, view stats, or generate reports.\n"
         "- **Data Cleaning**: Clean data (e.g., handle missing values, encode variables).\n"
+        "- **EDA**: Visualize data (e.g., scatter plots, histograms) and analyze plots.\n"
+        "When analyzing plots, provide detailed insights based on numerical data extracted from them."
     )
     context = ""
     if vector_store:
         docs = vector_store.similarity_search(user_input, k=3)
         if docs:
+            context = "\n\nDataset and Plot Context:\n" + "\n".join([f"- {doc.page_content}" for doc in docs])
+            system_prompt += f"Use this dataset and plot context to augment your response:\n{context}"
     else:
+        system_prompt += "No dataset or plot data is loaded. Assist based on app functionality."
     try:
         response = client.chat.completions.create(
         return "No plot available to analyze."
     plot_info = st.session_state.last_plot
     df = pd.read_json(plot_info["data"])
+    plot_text = extract_plot_data(plot_info, df)
+    return f"Analysis of the last plot:\n{plot_text}"
 def parse_command(command):
     command = command.lower().strip()
                             "y": y_axis,
                             "data": df[[x_axis, y_axis]].to_json() if y_axis else df[[x_axis]].to_json()
                         }
+                        # Extract numerical data and update vector store
+                        plot_text = extract_plot_data(st.session_state.last_plot, df)
+                        st.session_state.vector_store = update_vector_store_with_plot(plot_text, st.session_state.vector_store)
+                        with st.expander("Extracted Plot Data"):
+                            st.text(plot_text)
                     else:
                         st.error("Please provide required inputs for the selected plot type.")
                 except Exception as e: