gaia / chatbot_gaia /src /tools /custom_tool.py
Hugo Massonnat
move rpg data in the data folder
e76e66b
from typing import Type
from crewai.tools import BaseTool
from pydantic import BaseModel, Field
import openmeteo_requests
import requests_cache
from retry_requests import retry
import pandas as pd
class MyCustomToolInput(BaseModel):
"""Input schema for MyCustomTool."""
argument: str = Field(..., description="Description of the argument.")
class MyCustomTool(BaseTool):
name: str = "Name of my tool"
description: str = (
"Clear description for what this tool is useful for, your agent will need this information to use it."
)
args_schema: Type[BaseModel] = MyCustomToolInput
def _run(self, argument: str) -> str:
# Implementation goes here
return "this is an example of a tool output, ignore it and move along."
class rgp_cultures_regions(BaseTool):
name: str = "rgp_cultures_regions"
description: str = "Extraite les cultures les plus populaires en terme de surface cultuvé en hectare pour une région donnée en france métropolitaine"
def _run(self, region: str) -> pd.DataFrame:
"""
Extract the most popular cultures in terms of cultivated area in hectares for a given region in metropolitan France
schema of the DataFrame:
CODE_CULTU : code of the culture
REGION: name of the region
SURF_PARC: cultivated area in hectares
CODE_CULTURE: code of the culture
LIBELLE_CULTURE: name of the culture
LIBELLE_GROUPE_CULTURE: name of the culture group
ACTIF: oui or non
Args:
region (str): name of region in metropolitan France
Returns:
pd.DataFrame: DataFrame containing the most popular cultures in terms of cultivated area in hectares for a given region in metropolitan France
"""
# load the data
df_rpg = pd.read_csv("data/data_rpg/data_prepared_rpg.csv")
# filter the data (lower case)
df_rpg["REGION"] = df_rpg["REGION"].str.lower()
region = region.lower()
df_rpg = df_rpg[df_rpg["REGION"] == region]
# kepp only the top 10 more cultivae cultures
df_rpg = df_rpg.sort_values(by="SURF_PARC", ascending=False).head(10)
return df_rpg
class MeteoTool(BaseTool):
name: str = "meteo_tool"
description: str = "Extraite les données météorologiques des dix dernières années pour la région identifiée en utilisant des localisations GPS du centre de la région"
def _run(self, latitude: float, longitude: float) -> pd.DataFrame:
"""
Extract the meteorological data for the last ten years for the identified region using GPS locations of the center of the region
schema of the DataFrame:
year_month : month and year
month : month
year : year
temperature_2m_max : temperature in celsius at 2m height
temperature_2m_min : temperature in celsius at 2m height
precipitation_sum : precipitation in mm
sunchine_duration : sunshine duration in hours
Args:
latitude (float): latitude of the center of the region
longitude (float): longitude of the center of the region
Returns:
pd.DataFrame: DataFrame containing the meteorological data for the identified region using GPS locations of the center of the region
"""
cache_session = requests_cache.CachedSession('.cache', expire_after = 300)
retry_session = retry(cache_session, retries = 5, backoff_factor = 0.2)
openmeteo = openmeteo_requests.Client(session = retry_session)
url = "https://archive-api.open-meteo.com/v1/archive"
params = {
"latitude": latitude,
"longitude": longitude,
"start_date": "2015-01-01",
"end_date": "2024-12-31",
"daily": ["temperature_2m_max", "temperature_2m_min", "sunshine_duration", "precipitation_sum"] }
responses = openmeteo.weather_api(url, params=params)
# Process first location. Add a for-loop for multiple locations or weather models
response = responses[0]
print(f"Coordinates {response.Latitude()}°N {response.Longitude()}°E")
print(f"Elevation {response.Elevation()} m asl")
print(f"Timezone {response.Timezone()} {response.TimezoneAbbreviation()}")
print(f"Timezone difference to GMT+0 {response.UtcOffsetSeconds()} s")
# Process daily data. The order of variables needs to be the same as requested.
daily = response.Daily()
daily_temperature_2m_max = daily.Variables(0).ValuesAsNumpy()
daily_temperature_2m_min = daily.Variables(1).ValuesAsNumpy()
daily_sunshine_duration = daily.Variables(2).ValuesAsNumpy()
daily_precipitation_sum = daily.Variables(3).ValuesAsNumpy()
daily_data = {"date": pd.date_range(
start = pd.to_datetime(daily.Time(), unit = "s", utc = True),
end = pd.to_datetime(daily.TimeEnd(), unit = "s", utc = True),
freq = pd.Timedelta(seconds = daily.Interval()),
inclusive = "left"
)}
daily_data["temperature_2m_max"] = daily_temperature_2m_max
daily_data["temperature_2m_min"] = daily_temperature_2m_min
daily_data["sunshine_duration"] = daily_sunshine_duration
daily_data["precipitation_sum"] = daily_precipitation_sum
daily_dataframe = pd.DataFrame(data = daily_data)
daily_dataframe["sunshine_duration"] = daily_dataframe["sunshine_duration"] / 3600
# aggregate data to monthly
daily_dataframe["month"] = daily_dataframe["date"].dt.month
daily_dataframe["year"] = daily_dataframe["date"].dt.year
daily_dataframe["year_month"] = daily_dataframe.apply(lambda x: x["date"].strftime("%Y%m"), axis=1)
monthly_dataframe = daily_dataframe.groupby(["year_month","year","month"]).agg(
temperature_2m_max = ("temperature_2m_max", "mean"),
temperature_2m_min = ("temperature_2m_min", "mean"),
precipitation_sum = ("precipitation_sum", "sum"),
sunchine_duration = ("sunshine_duration", "sum")
).reset_index()
return monthly_dataframe