GenBIChatbotllama

Runtime error

App Files Files Community

Ari commited on Sep 28, 2024

Commit

5ad9e6e

verified ·

1 Parent(s): e14b81b

Update app.py

Browse files

Files changed (1) hide show

app.py +36 -74

app.py CHANGED Viewed

@@ -3,6 +3,8 @@ import streamlit as st
 import pandas as pd
 import sqlite3
 from langchain import OpenAI, LLMChain, PromptTemplate
 import sqlparse
 import logging
@@ -11,7 +13,6 @@ if 'history' not in st.session_state:
     st.session_state.history = []
 # OpenAI API key (ensure it is securely stored)
-# You can set the API key in your environment variables or a .env file
 openai_api_key = os.getenv("OPENAI_API_KEY")
 # Check if the API key is set
@@ -19,6 +20,18 @@ if not openai_api_key:
     st.error("OpenAI API key is not set. Please set the OPENAI_API_KEY environment variable.")
     st.stop()
 # Step 1: Upload CSV data file (or use default)
 st.title("Natural Language to SQL Query App with Enhanced Insights")
 st.write("Upload a CSV file to get started, or use the default dataset.")
@@ -43,8 +56,8 @@ data.to_sql(table_name, conn, index=False, if_exists='replace')
 valid_columns = list(data.columns)
 st.write(f"Valid columns: {valid_columns}")
-# Step 3: Set up the LLM Chains
-# SQL Generation Chain
 sql_template = """
 You are an expert data scientist. Given a natural language question, the name of the table, and a list of valid columns, generate a valid SQL query that answers the question.
@@ -66,34 +79,22 @@ Valid columns: {columns}
 SQL Query:
 """
 sql_prompt = PromptTemplate(template=sql_template, input_variables=['question', 'table_name', 'columns'])
-llm = OpenAI(temperature=0, openai_api_key=openai_api_key, max_tokens = 180)
-sql_generation_chain = LLMChain(llm=llm, prompt=sql_prompt)
-# Insights Generation Chain
-insights_template =  """
-You are an expert data scientist. Based on the user's question and the SQL query result provided below, generate a concise analysis that includes key data insights and actionable recommendations. Limit the response to a maximum of 150 words.
-User's Question: {question}
-SQL Query Result:
-{result}
-Concise Analysis (max 200 words):
-"""
-insights_prompt = PromptTemplate(template=insights_template, input_variables=['question', 'result'])
-insights_chain = LLMChain(llm=llm, prompt=insights_prompt)
-# General Insights and Recommendations Chain
-general_insights_template = """
-You are an expert data scientist. Based on the entire dataset provided below, generate a concise analysis with key insights and recommendations. Limit the response to 150 words.
-Dataset Summary:
-{dataset_summary}
-Concise Analysis and Recommendations (max 150 words):
-"""
-general_insights_prompt = PromptTemplate(template=general_insights_template, input_variables=['dataset_summary'])
-general_insights_chain = LLMChain(llm=llm, prompt=general_insights_prompt)
 # Optional: Clean up function to remove incorrect COLLATE NOCASE usage
 def clean_sql_query(query):
@@ -130,7 +131,7 @@ def classify_query(question):
     Category (SQL/INSIGHTS):
     """
     classification_prompt = PromptTemplate(template=classification_template, input_variables=['question'])
-    classification_chain = LLMChain(llm=llm, prompt=classification_prompt)
     category = classification_chain.run({'question': question}).strip().upper()
     if category.startswith('SQL'):
         return 'SQL'
@@ -140,17 +141,7 @@ def classify_query(question):
 # Function to generate dataset summary
 def generate_dataset_summary(data):
     """Generate a summary of the dataset for general insights."""
-    summary_template = """
-    You are an expert data scientist. Based on the dataset provided below, generate a concise summary that includes the number of records, number of columns, data types, and any notable features.
-    Dataset:
-    {data}
-    Dataset Summary:
-    """
-    summary_prompt = PromptTemplate(template=summary_template, input_variables=['data'])
-    summary_chain = LLMChain(llm=llm, prompt=summary_prompt)
-    summary = summary_chain.run({'data': data.head().to_string(index=False)})
     return summary
 # Define the callback function
@@ -178,21 +169,9 @@ def process_input():
                 }).strip()
                 if generated_sql.upper() == "NO_SQL":
-                    # Handle cases where no SQL should be generated
-                    assistant_response = "Sure, let's discuss some general insights and recommendations based on the data."
-                    # Generate dataset summary
-                    dataset_summary = generate_dataset_summary(data)
-                    # Generate general insights and recommendations
-                    general_insights = general_insights_chain.run({
-                        'dataset_summary': dataset_summary
-                    })
-                    # Append the assistant's insights to the history
-                    st.session_state.history.append({"role": "assistant", "content": general_insights})
                 else:
-                    # Clean the SQL query
                     cleaned_sql = clean_sql_query(generated_sql)
                     logging.info(f"Generated SQL Query: {cleaned_sql}")
@@ -204,35 +183,18 @@ def process_input():
                             assistant_response = "The query returned no results. Please try a different question."
                             st.session_state.history.append({"role": "assistant", "content": assistant_response})
                         else:
-                            # Convert the result to a string for the insights prompt
-                            result_str = result.head(10).to_string(index=False)  # Limit to first 10 rows
-                            # Generate insights and recommendations based on the query result
-                            insights = insights_chain.run({
-                                'question': user_prompt,
-                                'result': result_str
-                            })
-                            # Append the assistant's insights to the history
-                            st.session_state.history.append({"role": "assistant", "content": insights})
-                            # Append the result DataFrame to the history
                             st.session_state.history.append({"role": "assistant", "content": result})
                     except Exception as e:
                         logging.error(f"An error occurred during SQL execution: {e}")
                         assistant_response = f"Error executing SQL query: {e}"
                         st.session_state.history.append({"role": "assistant", "content": assistant_response})
             else:  # INSIGHTS category
-                # Generate dataset summary
                 dataset_summary = generate_dataset_summary(data)
-                # Generate general insights and recommendations
-                general_insights = general_insights_chain.run({
-                    'dataset_summary': dataset_summary
-                })
-                # Append the assistant's insights to the history
-                st.session_state.history.append({"role": "assistant", "content": general_insights})
         except Exception as e:
             logging.error(f"An error occurred: {e}")
             assistant_response = f"Error: {e}"

 import pandas as pd
 import sqlite3
 from langchain import OpenAI, LLMChain, PromptTemplate
+from transformers import LlamaForCausalLM, LlamaTokenizer
+import torch
 import sqlparse
 import logging
     st.session_state.history = []
 # OpenAI API key (ensure it is securely stored)
 openai_api_key = os.getenv("OPENAI_API_KEY")
 # Check if the API key is set
     st.error("OpenAI API key is not set. Please set the OPENAI_API_KEY environment variable.")
     st.stop()
+# Load the LLaMA model and tokenizer
+model_name = "huggingface/llama"  # Replace with the actual LLaMA model name you want to use
+device = "cuda" if torch.cuda.is_available() else "cpu"
+llama_tokenizer = LlamaTokenizer.from_pretrained(model_name)
+llama_model = LlamaForCausalLM.from_pretrained(model_name).to(device)
+# Function to generate responses using LLaMA
+def generate_llama_response(prompt):
+    inputs = llama_tokenizer(prompt, return_tensors="pt").to(device)
+    outputs = llama_model.generate(inputs.input_ids, max_length=200)
+    return llama_tokenizer.decode(outputs[0], skip_special_tokens=True)
 # Step 1: Upload CSV data file (or use default)
 st.title("Natural Language to SQL Query App with Enhanced Insights")
 st.write("Upload a CSV file to get started, or use the default dataset.")
 valid_columns = list(data.columns)
 st.write(f"Valid columns: {valid_columns}")
+# Step 3: Set up the LLM Chains (SQL generation with OpenAI, insights with LLaMA)
+# SQL Generation Chain with OpenAI
 sql_template = """
 You are an expert data scientist. Given a natural language question, the name of the table, and a list of valid columns, generate a valid SQL query that answers the question.
 SQL Query:
 """
 sql_prompt = PromptTemplate(template=sql_template, input_variables=['question', 'table_name', 'columns'])
+sql_llm = OpenAI(temperature=0, openai_api_key=openai_api_key, max_tokens=180)
+sql_generation_chain = LLMChain(llm=sql_llm, prompt=sql_prompt)
+# General Insights and Recommendations Chain with LLaMA
+def generate_insights_llama(question, data_summary):
+    insights_template = f"""
+    You are an expert data scientist. Based on the user's question and the dataset summary provided below, generate concise data insights and actionable recommendations.
+    User's Question: {question}
+    Dataset Summary:
+    {data_summary}
+    Concise Insights and Recommendations:
+    """
+    return generate_llama_response(insights_template)
 # Optional: Clean up function to remove incorrect COLLATE NOCASE usage
 def clean_sql_query(query):
     Category (SQL/INSIGHTS):
     """
     classification_prompt = PromptTemplate(template=classification_template, input_variables=['question'])
+    classification_chain = LLMChain(llm=sql_llm, prompt=classification_prompt)
     category = classification_chain.run({'question': question}).strip().upper()
     if category.startswith('SQL'):
         return 'SQL'
 # Function to generate dataset summary
 def generate_dataset_summary(data):
     """Generate a summary of the dataset for general insights."""
+    summary = f"Number of records: {len(data)}, Number of columns: {len(data.columns)}, Columns: {list(data.columns)}"
     return summary
 # Define the callback function
                 }).strip()
                 if generated_sql.upper() == "NO_SQL":
+                    assistant_response = "No SQL query could be generated."
+                    st.session_state.history.append({"role": "assistant", "content": assistant_response})
                 else:
                     cleaned_sql = clean_sql_query(generated_sql)
                     logging.info(f"Generated SQL Query: {cleaned_sql}")
                             assistant_response = "The query returned no results. Please try a different question."
                             st.session_state.history.append({"role": "assistant", "content": assistant_response})
                         else:
+                            # Display query results
                             st.session_state.history.append({"role": "assistant", "content": result})
                     except Exception as e:
                         logging.error(f"An error occurred during SQL execution: {e}")
                         assistant_response = f"Error executing SQL query: {e}"
                         st.session_state.history.append({"role": "assistant", "content": assistant_response})
             else:  # INSIGHTS category
                 dataset_summary = generate_dataset_summary(data)
+                insights = generate_insights_llama(user_prompt, dataset_summary)
+                st.session_state.history.append({"role": "assistant", "content": insights})
         except Exception as e:
             logging.error(f"An error occurred: {e}")
             assistant_response = f"Error: {e}"