Spaces:

EditsPaarth
/

AI-Data-Analysis

Sleeping

App Files Files Community

EditsPaarth commited on Nov 18, 2024

Commit

e83d0d8

verified ·

1 Parent(s): 64f4108

Update app.py

Browse files

Files changed (1) hide show

app.py +37 -38

app.py CHANGED Viewed

@@ -3,9 +3,11 @@ import pandas as pd
 import numpy as np
 import seaborn as sns
 import matplotlib.pyplot as plt
 from groq import Groq
-# Groq API Key setup
 GROQ_API_KEY = "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"
 client = Groq(api_key=GROQ_API_KEY)
@@ -17,6 +19,7 @@ def chat_with_groq(prompt):
             model="llama3-8b-8192",
             stream=False
         )
         return chat_completion.choices[0].message.content
     except Exception as e:
         return f"Error fetching response: {e}"
@@ -24,10 +27,12 @@ def chat_with_groq(prompt):
 def generate_code_with_groq(prompt):
     try:
         chat_completion = client.chat.completions.create(
-            messages=[{"role": "user", "content": prompt}, {"role": "assistant", "content": "```python"}],
             model="gemma-7b-it",
             stream=False,
-            stop="```"
         )
         return chat_completion.choices[0].message.content
     except Exception as e:
@@ -47,7 +52,7 @@ def parse_file(uploaded_file):
 # Preprocess DataFrame to Fix Type Issues
 def preprocess_dataframe(df):
     try:
-        # Convert problematic columns to string to avoid serialization issues
         for col in df.columns:
             if df[col].dtype.name == 'object' or df[col].dtype.name == 'category':
                 df[col] = df[col].astype(str)
@@ -82,6 +87,9 @@ def analyze_data(data, visualization_type, class_size=10):
         fig, ax = plt.subplots(figsize=(8, 6))
         sns.heatmap(numeric_data.corr(), annot=True, ax=ax, cmap="coolwarm", fmt=".2f")
         st.pyplot(fig)
     elif visualization_type == "Line Graph" and not numeric_data.empty:
         st.subheader("Line Graph")
         x_col = st.selectbox("Select the X-axis column for the Line Graph (Non-Numeric):", numeric_data.columns)
@@ -92,6 +100,10 @@ def analyze_data(data, visualization_type, class_size=10):
         ax.set_xlabel(x_col)
         ax.set_ylabel(y_col)
         st.pyplot(fig)
     elif visualization_type == "Area Chart" and not numeric_data.empty:
         st.subheader("Area Chart")
         column = st.selectbox("Select a column for the Area Chart:", numeric_data.columns)
@@ -100,6 +112,7 @@ def analyze_data(data, visualization_type, class_size=10):
         ax.set_xlabel(column)
         ax.set_ylabel("Area")
         st.pyplot(fig)
     else:
         st.warning("No valid visualization option selected or data available.")
@@ -107,39 +120,21 @@ def analyze_data(data, visualization_type, class_size=10):
     prompt = generate_groq_prompt(data, visualization_type, class_size)
     return prompt
-# Generate Groq Prompt with Indented Table
 def generate_groq_prompt(data, visualization_type, class_size):
-    # Limit the preview to the first 10 rows for readability
-    data_snippet = data.head(10)  # Select top 10 rows for the preview
-    # Compute column widths for alignment
-    column_widths = [max(len(str(val)) for val in [col] + data_snippet[col].tolist()) for col in data_snippet.columns]
-    # Create the table header with spacing
-    table_header = " ".join(f"{col:<{column_widths[i]}}" for i, col in enumerate(data_snippet.columns))
-    # Create the rows with spacing
-    table_rows = "\n".join(
-        "    " + " ".join(f"{str(val):<{column_widths[i]}}" for i, val in enumerate(row))  # Add indentation
-        for row in data_snippet.values
-    )
-    # Combine header and rows into a single table
-    formatted_table = f"    {table_header}\n{table_rows}"
-    # Create the textual prompt
     prompt = f"""
-    The user has uploaded a dataset and selected the '{visualization_type}' visualization type with a class size of {class_size}.
-    Below is a preview of the dataset:
-    {formatted_table}
-    Please generate Python code for the analysis and visualization specified.
     """
     return prompt
 # Streamlit App
 st.title("Data Analysis AI")
 st.markdown("Upload a file (CSV or Excel) to analyze it.")
@@ -152,7 +147,7 @@ if uploaded_file is not None:
         if data is not None:
             data = preprocess_dataframe(data)  # Fix serialization issues
             st.subheader("Uploaded Data")
-            st.dataframe(data)  # Show raw data in tabular form
             # Visualization Selection
             visualization_type = st.selectbox(
@@ -163,14 +158,18 @@ if uploaded_file is not None:
             # User input for class size customization
             class_size = st.slider("Select the class size for certain plots (e.g., Histogram)", 5, 50, 10)
-            # Perform Analysis and Generate Prompt for Groq
             prompt = analyze_data(data, visualization_type, class_size)
-            # Send the prompt directly to Groq
-            st.subheader("Groq Processing")
-            response = chat_with_groq(prompt)
-            st.write("Groq's Response:")
-            st.write(response)
             # Groq Code Generation Section
             st.subheader("Generate Python Code with Groq")

 import numpy as np
 import seaborn as sns
 import matplotlib.pyplot as plt
+import tempfile
+import subprocess
 from groq import Groq
+# Groq API Key setup .
 GROQ_API_KEY = "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"
 client = Groq(api_key=GROQ_API_KEY)
             model="llama3-8b-8192",
             stream=False
         )
+        print(prompt)
         return chat_completion.choices[0].message.content
     except Exception as e:
         return f"Error fetching response: {e}"
 def generate_code_with_groq(prompt):
     try:
         chat_completion = client.chat.completions.create(
+            messages=[{"role": "user", "content": prompt}, {"role": "assistant", "content": "
+python"}],
             model="gemma-7b-it",
             stream=False,
+            stop="
+"
         )
         return chat_completion.choices[0].message.content
     except Exception as e:
 # Preprocess DataFrame to Fix Type Issues
 def preprocess_dataframe(df):
     try:
+        # Convert problematic columns to string to avoid Arrow serialization issues
         for col in df.columns:
             if df[col].dtype.name == 'object' or df[col].dtype.name == 'category':
                 df[col] = df[col].astype(str)
         fig, ax = plt.subplots(figsize=(8, 6))
         sns.heatmap(numeric_data.corr(), annot=True, ax=ax, cmap="coolwarm", fmt=".2f")
         st.pyplot(fig)
     elif visualization_type == "Line Graph" and not numeric_data.empty:
         st.subheader("Line Graph")
         x_col = st.selectbox("Select the X-axis column for the Line Graph (Non-Numeric):", numeric_data.columns)
         ax.set_xlabel(x_col)
         ax.set_ylabel(y_col)
         st.pyplot(fig)
     elif visualization_type == "Area Chart" and not numeric_data.empty:
         st.subheader("Area Chart")
         column = st.selectbox("Select a column for the Area Chart:", numeric_data.columns)
         ax.set_xlabel(column)
         ax.set_ylabel("Area")
         st.pyplot(fig)
     else:
         st.warning("No valid visualization option selected or data available.")
     prompt = generate_groq_prompt(data, visualization_type, class_size)
     return prompt
+# Function to generate a prompt based on the data analysis
 def generate_groq_prompt(data, visualization_type, class_size):
+    # Convert DataFrame to a string without the index
+    data_without_index = data.to_string(index=False)
     prompt = f"""
+    Here is the summary statistics for the dataset:
+    {data_without_index}
+    The user has selected the '{visualization_type}' visualization type with a class size of {class_size}.
+    Please generate Python code that does this and for any data, please don't use any file input. Write the data in the code.
     """
     return prompt
 # Streamlit App
 st.title("Data Analysis AI")
 st.markdown("Upload a file (CSV or Excel) to analyze it.")
         if data is not None:
             data = preprocess_dataframe(data)  # Fix serialization issues
             st.subheader("Uploaded Data")
+            st.write(data.head())
             # Visualization Selection
             visualization_type = st.selectbox(
             # User input for class size customization
             class_size = st.slider("Select the class size for certain plots (e.g., Histogram)", 5, 50, 10)
+            # Perform Analysis and Visualization
             prompt = analyze_data(data, visualization_type, class_size)
+            st.text(f"Prompt sent to Groq:\n{prompt}")
+            # Chat with Groq Section
+            st.subheader("Chat with Groq")
+            chat_input = st.text_area("Ask Groq questions about the data:")
+            if st.button("Chat"):
+                if chat_input:
+                    chat_response = chat_with_groq(f"Here is the data:\n{data}\n\n{chat_input}")
+                    st.write("Groq's Response:")
+                    st.write(chat_response)
             # Groq Code Generation Section
             st.subheader("Generate Python Code with Groq")