Spaces:

pwilczewski
/

gradiobox

Sleeping

File size: 4,882 Bytes

import gradio as gr

# cell 1
from typing import Annotated
from langchain_experimental.tools import PythonREPLTool, PythonAstREPLTool
import pandas as pd
import statsmodels as sm
import os

# df = pd.read_csv("HOUST.csv")
df = pd.read_csv("USSTHPI.csv")
python_repl_tool = PythonAstREPLTool(locals={"df": df})

import matplotlib.pyplot as nplt
def gen_plot(name):
    nplt.figure(figsize=(10, 5))
    nplt.plot(df['DATE'], df['USSTHPI'], label='USSTHPI')
    nplt.title('Time Series of USSTHPI')
    nplt.xlabel('Date')
    nplt.ylabel('USSTHPI')
    nplt.legend()
    nplt.xticks(rotation=45)
    nplt.tight_layout()
    nplt.savefig('plots/plot.png')
    return "plots/plot.png"

# cell 2
from langchain.agents import AgentExecutor, create_openai_tools_agent
from langchain_core.messages import BaseMessage, HumanMessage, SystemMessage
from langchain_openai import ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder, HumanMessagePromptTemplate
import functools
import operator
from typing import Sequence, TypedDict

system_prompt = """You are working with a pandas dataframe in Python. The name of the dataframe is `df`.
                It is important to understand the attributes of the dataframe before working with it. This is the result of running `df.head().to_markdown()`

                <df>
                {dhead}
                </df>

                You are not meant to use only these rows to answer questions - they are meant as a way of telling you about the shape and schema of the dataframe. 
                You also do not have use only the information here to answer questions - you can run intermediate queries to do exporatory data analysis to give you more information as needed. """
system_prompt = system_prompt.format(dhead=df.head().to_markdown())

# The agent state is the input to each node in the graph
class AgentState(TypedDict):
    # The annotation tells the graph that new messages will always be added to the current states
    messages: Annotated[Sequence[BaseMessage], operator.add]
    # The 'next' field indicates where to route to next
    next: str

# part of the problem might be that I'm passing a PromptTemplate object for the system_prompt here
# not everything needs to be an openai tools agent
def create_agent(llm: ChatOpenAI, tools: list, task: str):
    # Each worker node will be given a name and some tools.
    prompt = ChatPromptTemplate.from_messages(
        [
            ( "system", system_prompt, ), # using a global system_prompt
            HumanMessage(content=task),
            MessagesPlaceholder(variable_name="messages"),
            MessagesPlaceholder(variable_name="agent_scratchpad"),
        ]
    )
    agent = create_openai_tools_agent(llm, tools, prompt)
    # for debugging
    executor = AgentExecutor(agent=agent, tools=tools, verbose=True, return_intermediate_steps=True)
    return executor

# AIMessage will have all kinds of metadata, so treat it all as HumanMessage I suppose?
def agent_node(state: AgentState, agent, name):
    result = agent.invoke(state)
    # return {"messages": [HumanMessage(content=result["output"], name=name)]}
    return {"messages": [result]}

# I need to write the message to state here? or is that handled automatically?
def chain_node(state: AgentState, chain, name):

    result = chain.invoke(input={"detail": "medium", "messages": state["messages"]})
    return {"messages": [HumanMessage(content=result.content, name=name)]}

# cell 3
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
llm = ChatOpenAI(model="gpt-4o-mini-2024-07-18", temperature=0, api_key=OPENAI_API_KEY)
llm_big = ChatOpenAI(model="gpt-4o", temperature=0, api_key=OPENAI_API_KEY)

eda_task = """Using the data in the dataframe `df` and the package statsmodels, first run an augmented dickey fuller test on the data.
            Using matplotlib plot the time series, display it and save it to 'plots/plot.png'.
            Next use the statsmodel package to generate an ACF plot with zero flag set to False, display it and save it to 'plots/acf.png'.
            Then use the statsmodel package to generate a PACF plot with zero flag set to False, display it and save it to 'plots/pacf.png'"""
eda_agent = create_agent(llm, [python_repl_tool], task=eda_task,)
eda_node = functools.partial(agent_node, agent=eda_agent, name="EDA")

from langgraph.graph import END, StateGraph, START

# add a chain to the node to analyze the ACF plot?
workflow = StateGraph(AgentState)
workflow.add_node("EDA", eda_node)

# conditional_edge to refit and the loop refit with resid?
workflow.add_edge(START, "EDA")
workflow.add_edge("EDA", END)

graph = workflow.compile()

def greet(name):
    resp = graph.invoke({"messages": [HumanMessage(content="Run the analysis")]}, debug=True)
    return resp

demo = gr.Interface(fn=gen_plot, inputs="text", outputs="text")
demo.launch()