import os import pandas as pd import gradio as gr from langchain_google_genai import ChatGoogleGenerativeAI from langchain_experimental.agents import create_pandas_dataframe_agent from langchain_core.prompts import PromptTemplate # Set up API key for Google Gemini os.environ["GOOGLE_API_KEY"] = "AIzaSyDSorjiEVV2KCWelkDLFxQsju3KDQOF344" # Replace with actual API key # Initialize the LLM llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash") # Placeholder for agent and dataframe agent = None df = None # Define the function to handle CSV uploads and set up the LangChain agent def handle_file_upload(file): global agent, df # Check if file has .csv extension if not file.name.endswith(".csv"): return "Error: Please upload a valid CSV file.", None # Load the uploaded file into a DataFrame try: df = pd.read_csv(file) # Read directly from the file object # Create a new LangChain agent with the uploaded DataFrame agent = create_pandas_dataframe_agent(llm, df, verbose=True, allow_dangerous_code=True) return "CSV uploaded successfully. You can now ask questions about the data.", df except Exception as e: return f"Error reading CSV file: {e}", None # Define the function to process the user query def answer_query(query): if agent is None: return "Please upload a CSV file first." # Invoke the agent with the query formatted_query = PromptTemplate.from_template( ''' Please act as a data analyst and respond to my queries with insights from the provided dataset. If your response involves numeric data or comparisons, format the answer in a clear tabular form whenever it enhances readability and clarity. Provide analyses that highlight trends, patterns, and notable details in the data, and use tabular format for presenting summaries, comparisons, or grouped data and whenever user asks listing or something similar to help illustrate your findings effectively. Additionally, interpret any findings with context and data-driven reasoning as a skilled data analyst would. Also make sure not to give any data that is not asked by the user or not relevant to the given context Keep the above said details in mind and answer the below query: Query: {query} ''' ) response = agent.invoke(query) # Check if the response contains tabular data if isinstance(response, pd.DataFrame): return response # Display as table if it's a DataFrame else: # Format response as Markdown return f"**Response:**\n\n{response['output']}" # Create the Gradio interface with gr.Blocks() as iface: gr.Markdown("# ZEN-Analyser") gr.Markdown("Upload a CSV file to view the data and ask questions about it.") # File upload component file_input = gr.File(label="Upload CSV", file_types=[".csv"]) # Dataframe display for the uploaded CSV data_output = gr.DataFrame(label="Uploaded Data") # Textbox for entering queries query_input = gr.Textbox(label="Enter your query") # Markdown component for displaying the agent's response with Markdown support response_output = gr.Markdown(label="Response") # Button to trigger query processing query_button = gr.Button("Submit Query") # Define event actions file_input.upload(handle_file_upload, file_input, [response_output, data_output]) query_button.click(answer_query, query_input, response_output) # Launch the Gradio app iface.launch()