Spaces:

pwilczewski
/

gradiobox

Sleeping

App Files Files Community

pwilczewski commited on Sep 23, 2024

Commit

6589e60

1 Parent(s): 0169c8b

just EDA

Browse files

Files changed (2) hide show

app.py +88 -1
requirements.txt +184 -1

app.py CHANGED Viewed

@@ -1,7 +1,94 @@
 import gradio as gr
 def greet(name):
-    return "Hello " + name + "!!"
 demo = gr.Interface(fn=greet, inputs="text", outputs="text")
 demo.launch()

 import gradio as gr
+# cell 1
+from typing import Annotated
+from langchain_experimental.tools import PythonREPLTool, PythonAstREPLTool
+import pandas as pd
+import statsmodels as sm
+# df = pd.read_csv("HOUST.csv")
+df = pd.read_csv("USSTHPI.csv")
+python_repl_tool = PythonAstREPLTool(locals={"df": df})
+# cell 2
+from langchain.agents import AgentExecutor, create_openai_tools_agent
+from langchain_core.messages import BaseMessage, HumanMessage, SystemMessage
+from langchain_openai import ChatOpenAI
+from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder, HumanMessagePromptTemplate
+import functools
+import operator
+from typing import Sequence, TypedDict
+system_prompt = """You are working with a pandas dataframe in Python. The name of the dataframe is `df`.
+                It is important to understand the attributes of the dataframe before working with it. This is the result of running `df.head().to_markdown()`
+                <df>
+                {dhead}
+                </df>
+                You are not meant to use only these rows to answer questions - they are meant as a way of telling you about the shape and schema of the dataframe.
+                You also do not have use only the information here to answer questions - you can run intermediate queries to do exporatory data analysis to give you more information as needed. """
+system_prompt = system_prompt.format(dhead=df.head().to_markdown())
+# The agent state is the input to each node in the graph
+class AgentState(TypedDict):
+    # The annotation tells the graph that new messages will always be added to the current states
+    messages: Annotated[Sequence[BaseMessage], operator.add]
+    # The 'next' field indicates where to route to next
+    next: str
+# part of the problem might be that I'm passing a PromptTemplate object for the system_prompt here
+# not everything needs to be an openai tools agent
+def create_agent(llm: ChatOpenAI, tools: list, task: str):
+    # Each worker node will be given a name and some tools.
+    prompt = ChatPromptTemplate.from_messages(
+        [
+            ( "system", system_prompt, ), # using a global system_prompt
+            HumanMessage(content=task),
+            MessagesPlaceholder(variable_name="messages"),
+            MessagesPlaceholder(variable_name="agent_scratchpad"),
+        ]
+    )
+    agent = create_openai_tools_agent(llm, tools, prompt)
+    executor = AgentExecutor(agent=agent, tools=tools)
+    return executor
+# AIMessage will have all kinds of metadata, so treat it all as HumanMessage I suppose?
+def agent_node(state: AgentState, agent, name):
+    result = agent.invoke(state)
+    return {"messages": [HumanMessage(content=result["output"], name=name)]}
+# I need to write the message to state here? or is that handled automatically?
+def chain_node(state: AgentState, chain, name):
+    result = chain.invoke(input={"detail": "medium", "messages": state["messages"]})
+    return {"messages": [HumanMessage(content=result.content, name=name)]}
+# cell 3
+llm = ChatOpenAI(model="gpt-4o-mini-2024-07-18", temperature=0)
+llm_big = ChatOpenAI(model="gpt-4o", temperature=0)
+eda_task = """Using the data in the dataframe `df` and the package statsmodels, first run an augmented dickey fuller test on the data.
+            Using matplotlib plot the time series, display it and save it to 'plot.png'.
+            Next use the statsmodel package to generate an ACF plot with zero flag set to False, display it and save it to 'acf.png'.
+            Then use the statsmodel package to generate a PACF plot with zero flag set to False, display it and save it to 'pacf.png'"""
+eda_agent = create_agent(llm, [python_repl_tool], task=eda_task,)
+eda_node = functools.partial(agent_node, agent=eda_agent, name="EDA")
+from langgraph.graph import END, StateGraph, START
+# add a chain to the node to analyze the ACF plot?
+workflow = StateGraph(AgentState)
+workflow.add_node("EDA", eda_node)
+# conditional_edge to refit and the loop refit with resid?
+workflow.add_edge(START, "EDA")
+workflow.add_edge("EDA", END)
+graph = workflow.compile()
 def greet(name):
+    return graph.invoke({"messages": [HumanMessage(content="Run the analysis")]})
 demo = gr.Interface(fn=greet, inputs="text", outputs="text")
 demo.launch()

requirements.txt CHANGED Viewed

	@@ -1 +1,184 @@
1	- ~~pandas~~

+# This file may be used to create an environment using:
+# $ conda create --name <env> --file <this file>
+# platform: win-64
+aiofiles=23.2.1=pypi_0
+aiohappyeyeballs=2.4.0=pypi_0
+aiohttp=3.10.5=pypi_0
+aiosignal=1.3.1=pypi_0
+annotated-types=0.7.0=pypi_0
+anyio=4.4.0=pypi_0
+asttokens=2.4.1=pyhd8ed1ab_0
+attrs=24.2.0=pypi_0
+blas=1.0=mkl
+bottleneck=1.3.7=py312he558020_0
+brotli=1.0.9=h2bbff1b_8
+brotli-bin=1.0.9=h2bbff1b_8
+bzip2=1.0.8=h2bbff1b_6
+ca-certificates=2024.7.2=haa95532_0
+certifi=2024.8.30=pypi_0
+charset-normalizer=3.3.2=pypi_0
+click=8.1.7=pypi_0
+colorama=0.4.6=pyhd8ed1ab_0
+comm=0.2.2=pyhd8ed1ab_0
+contourpy=1.2.0=py312h59b6b97_0
+cycler=0.11.0=pyhd3eb1b0_0
+dataclasses-json=0.6.7=pypi_0
+debugpy=1.6.7=py312hd77b12b_0
+decorator=5.1.1=pyhd8ed1ab_0
+distro=1.9.0=pypi_0
+exceptiongroup=1.2.2=pyhd8ed1ab_0
+executing=2.1.0=pyhd8ed1ab_0
+expat=2.6.2=hd77b12b_0
+fastapi=0.115.0=pypi_0
+ffmpy=0.4.0=pypi_0
+filelock=3.16.1=pypi_0
+fonttools=4.51.0=py312h2bbff1b_0
+freetype=2.12.1=ha860e81_0
+frozenlist=1.4.1=pypi_0
+fsspec=2024.9.0=pypi_0
+gradio=4.44.0=pypi_0
+gradio-client=1.3.0=pypi_0
+greenlet=3.0.3=pypi_0
+h11=0.14.0=pypi_0
+httpcore=1.0.5=pypi_0
+httpx=0.27.2=pypi_0
+huggingface-hub=0.25.1=pypi_0
+icc_rt=2022.1.0=h6049295_2
+icu=73.1=h6c2663c_0
+idna=3.8=pypi_0
+importlib-metadata=8.5.0=pyha770c72_0
+importlib-resources=6.4.5=pypi_0
+importlib_metadata=8.5.0=hd8ed1ab_0
+intel-openmp=2023.1.0=h59b6b97_46320
+ipykernel=6.29.5=pyh4bbf305_0
+ipython=8.27.0=pyh7428d3b_0
+jedi=0.19.1=pyhd8ed1ab_0
+jinja2=3.1.4=pypi_0
+jiter=0.5.0=pypi_0
+jpeg=9e=h827c3e9_3
+jsonpatch=1.33=pypi_0
+jsonpointer=3.0.0=pypi_0
+jupyter_client=8.6.2=pyhd8ed1ab_0
+jupyter_core=5.7.2=py312haa95532_0
+kiwisolver=1.4.4=py312hd77b12b_0
+krb5=1.20.1=h5b6d351_0
+langchain=0.3.0=pypi_0
+langchain-community=0.3.0=pypi_0
+langchain-core=0.3.1=pypi_0
+langchain-experimental=0.3.0=pypi_0
+langchain-openai=0.2.0=pypi_0
+langchain-text-splitters=0.3.0=pypi_0
+langgraph=0.2.22=pypi_0
+langgraph-checkpoint=1.0.10=pypi_0
+langsmith=0.1.121=pypi_0
+lcms2=2.12=h83e58a3_0
+lerc=3.0=hd77b12b_0
+libbrotlicommon=1.0.9=h2bbff1b_8
+libbrotlidec=1.0.9=h2bbff1b_8
+libbrotlienc=1.0.9=h2bbff1b_8
+libclang=14.0.6=default_hb5a9fac_1
+libclang13=14.0.6=default_h8e68704_1
+libdeflate=1.17=h2bbff1b_1
+libffi=3.4.4=hd77b12b_1
+libpng=1.6.39=h8cc25b3_0
+libpq=12.17=h906ac69_0
+libsodium=1.0.18=h8d14728_1
+libtiff=4.5.1=hd77b12b_0
+libwebp-base=1.3.2=h2bbff1b_0
+lz4-c=1.9.4=h2bbff1b_1
+markdown-it-py=3.0.0=pypi_0
+markupsafe=2.1.5=pypi_0
+marshmallow=3.22.0=pypi_0
+matplotlib=3.9.2=py312haa95532_0
+matplotlib-base=3.9.2=py312hbdc63d0_0
+matplotlib-inline=0.1.7=pyhd8ed1ab_0
+mdurl=0.1.2=pypi_0
+mkl=2023.1.0=h6b88ed4_46358
+mkl-service=2.4.0=py312h2bbff1b_1
+mkl_fft=1.3.10=py312h827c3e9_0
+mkl_random=1.2.7=py312h0158946_0
+msgpack=1.1.0=pypi_0
+multidict=6.0.5=pypi_0
+mypy-extensions=1.0.0=pypi_0
+nest-asyncio=1.6.0=pyhd8ed1ab_0
+numexpr=2.8.7=py312h96b7d27_0
+numpy=1.26.4=py312hfd52020_0
+numpy-base=1.26.4=py312h4dde369_0
+openai=1.46.0=pypi_0
+openjpeg=2.5.2=hae555c5_0
+openssl=3.0.15=h827c3e9_0
+orjson=3.10.7=pypi_0
+packaging=24.1=pyhd8ed1ab_0
+pandas=2.2.2=py312h0158946_0
+parso=0.8.4=pyhd8ed1ab_0
+patsy=0.5.6=py312haa95532_0
+pickleshare=0.7.5=py_1003
+pillow=10.4.0=py312h827c3e9_0
+pip=24.2=py312haa95532_0
+platformdirs=4.3.3=pyhd8ed1ab_0
+ply=3.11=py312haa95532_1
+prompt-toolkit=3.0.47=pyha770c72_0
+psutil=5.9.0=py312h2bbff1b_0
+pure_eval=0.2.3=pyhd8ed1ab_0
+pybind11-abi=5=hd3eb1b0_0
+pydantic=2.8.2=pypi_0
+pydantic-core=2.20.1=pypi_0
+pydantic-settings=2.5.2=pypi_0
+pydub=0.25.1=pypi_0
+pygments=2.18.0=pyhd8ed1ab_0
+pyparsing=3.1.2=py312haa95532_0
+pyqt=5.15.10=py312hd77b12b_0
+pyqt5-sip=12.13.0=py312h2bbff1b_0
+python=3.12.4=h14ffc60_1
+python-dateutil=2.9.0post0=py312haa95532_2
+python-dotenv=1.0.1=pypi_0
+python-multipart=0.0.10=pypi_0
+python-tzdata=2023.3=pyhd3eb1b0_0
+pytz=2024.1=py312haa95532_0
+pywin32=305=py312h2bbff1b_0
+pyyaml=6.0.2=pypi_0
+pyzmq=25.1.2=py312hd77b12b_0
+qt-main=5.15.2=h19c9488_10
+regex=2024.9.11=pypi_0
+requests=2.32.3=pypi_0
+rich=13.8.1=pypi_0
+ruff=0.6.7=pypi_0
+scipy=1.13.1=py312hbb039d4_0
+semantic-version=2.10.0=pypi_0
+setuptools=72.1.0=py312haa95532_0
+shellingham=1.5.4=pypi_0
+sip=6.7.12=py312hd77b12b_0
+six=1.16.0=pyhd3eb1b0_1
+sniffio=1.3.1=pypi_0
+sqlalchemy=2.0.33=pypi_0
+sqlite=3.45.3=h2bbff1b_0
+stack_data=0.6.2=pyhd8ed1ab_0
+starlette=0.38.6=pypi_0
+statsmodels=0.14.2=py312h4b0e54e_0
+tabulate=0.9.0=py312haa95532_0
+tbb=2021.8.0=h59b6b97_0
+tenacity=8.5.0=pypi_0
+tiktoken=0.7.0=pypi_0
+tk=8.6.14=h0416ee5_0
+tomlkit=0.12.0=pypi_0
+tornado=6.4.1=py312h827c3e9_0
+tqdm=4.66.5=pypi_0
+traitlets=5.14.3=pyhd8ed1ab_0
+typer=0.12.5=pypi_0
+typing-inspect=0.9.0=pypi_0
+typing_extensions=4.12.2=pyha770c72_0
+tzdata=2024a=h04d1e81_0
+unicodedata2=15.1.0=py312h2bbff1b_0
+urllib3=2.2.2=pypi_0
+uvicorn=0.30.6=pypi_0
+vc=14.40=h2eaa2aa_0
+vs2015_runtime=14.40.33807=h98bb1dd_0
+wcwidth=0.2.13=pyhd8ed1ab_0
+websockets=12.0=pypi_0
+wheel=0.43.0=py312haa95532_0
+xz=5.4.6=h8cc25b3_1
+yarl=1.9.7=pypi_0
+zeromq=4.3.5=hd77b12b_0
+zipp=3.20.2=pyhd8ed1ab_0
+zlib=1.2.13=h8cc25b3_1
+zstd=1.5.5=hd43e919_2