from typing import Type from crewai.tools import BaseTool from pydantic import BaseModel, Field import openmeteo_requests import requests_cache from retry_requests import retry import pandas as pd class MyCustomToolInput(BaseModel): """Input schema for MyCustomTool.""" argument: str = Field(..., description="Description of the argument.") class MyCustomTool(BaseTool): name: str = "Name of my tool" description: str = ( "Clear description for what this tool is useful for, your agent will need this information to use it." ) args_schema: Type[BaseModel] = MyCustomToolInput def _run(self, argument: str) -> str: # Implementation goes here return "this is an example of a tool output, ignore it and move along." class rgp_cultures_regions(BaseTool): name: str = "rgp_cultures_regions" description: str = "Extraite les cultures les plus populaires en terme de surface cultuvé en hectare pour une région donnée en france métropolitaine" def _run(self, region: str) -> pd.DataFrame: """ Extract the most popular cultures in terms of cultivated area in hectares for a given region in metropolitan France schema of the DataFrame: CODE_CULTU : code of the culture REGION: name of the region SURF_PARC: cultivated area in hectares CODE_CULTURE: code of the culture LIBELLE_CULTURE: name of the culture LIBELLE_GROUPE_CULTURE: name of the culture group ACTIF: oui or non Args: region (str): name of region in metropolitan France Returns: pd.DataFrame: DataFrame containing the most popular cultures in terms of cultivated area in hectares for a given region in metropolitan France """ # load the data df_rpg = pd.read_csv("data/data_rpg/data_prepared_rpg.csv") # filter the data (lower case) df_rpg["REGION"] = df_rpg["REGION"].str.lower() region = region.lower() df_rpg = df_rpg[df_rpg["REGION"] == region] # kepp only the top 10 more cultivae cultures df_rpg = df_rpg.sort_values(by="SURF_PARC", ascending=False).head(10) return df_rpg class MeteoTool(BaseTool): name: str = "meteo_tool" description: str = "Extraite les données météorologiques des dix dernières années pour la région identifiée en utilisant des localisations GPS du centre de la région" def _run(self, latitude: float, longitude: float) -> pd.DataFrame: """ Extract the meteorological data for the last ten years for the identified region using GPS locations of the center of the region schema of the DataFrame: year_month : month and year month : month year : year temperature_2m_max : temperature in celsius at 2m height temperature_2m_min : temperature in celsius at 2m height precipitation_sum : precipitation in mm sunchine_duration : sunshine duration in hours Args: latitude (float): latitude of the center of the region longitude (float): longitude of the center of the region Returns: pd.DataFrame: DataFrame containing the meteorological data for the identified region using GPS locations of the center of the region """ cache_session = requests_cache.CachedSession('.cache', expire_after = 300) retry_session = retry(cache_session, retries = 5, backoff_factor = 0.2) openmeteo = openmeteo_requests.Client(session = retry_session) url = "https://archive-api.open-meteo.com/v1/archive" params = { "latitude": latitude, "longitude": longitude, "start_date": "2015-01-01", "end_date": "2024-12-31", "daily": ["temperature_2m_max", "temperature_2m_min", "sunshine_duration", "precipitation_sum"] } responses = openmeteo.weather_api(url, params=params) # Process first location. Add a for-loop for multiple locations or weather models response = responses[0] print(f"Coordinates {response.Latitude()}°N {response.Longitude()}°E") print(f"Elevation {response.Elevation()} m asl") print(f"Timezone {response.Timezone()} {response.TimezoneAbbreviation()}") print(f"Timezone difference to GMT+0 {response.UtcOffsetSeconds()} s") # Process daily data. The order of variables needs to be the same as requested. daily = response.Daily() daily_temperature_2m_max = daily.Variables(0).ValuesAsNumpy() daily_temperature_2m_min = daily.Variables(1).ValuesAsNumpy() daily_sunshine_duration = daily.Variables(2).ValuesAsNumpy() daily_precipitation_sum = daily.Variables(3).ValuesAsNumpy() daily_data = {"date": pd.date_range( start = pd.to_datetime(daily.Time(), unit = "s", utc = True), end = pd.to_datetime(daily.TimeEnd(), unit = "s", utc = True), freq = pd.Timedelta(seconds = daily.Interval()), inclusive = "left" )} daily_data["temperature_2m_max"] = daily_temperature_2m_max daily_data["temperature_2m_min"] = daily_temperature_2m_min daily_data["sunshine_duration"] = daily_sunshine_duration daily_data["precipitation_sum"] = daily_precipitation_sum daily_dataframe = pd.DataFrame(data = daily_data) daily_dataframe["sunshine_duration"] = daily_dataframe["sunshine_duration"] / 3600 # aggregate data to monthly daily_dataframe["month"] = daily_dataframe["date"].dt.month daily_dataframe["year"] = daily_dataframe["date"].dt.year daily_dataframe["year_month"] = daily_dataframe.apply(lambda x: x["date"].strftime("%Y%m"), axis=1) monthly_dataframe = daily_dataframe.groupby(["year_month","year","month"]).agg( temperature_2m_max = ("temperature_2m_max", "mean"), temperature_2m_min = ("temperature_2m_min", "mean"), precipitation_sum = ("precipitation_sum", "sum"), sunchine_duration = ("sunshine_duration", "sum") ).reset_index() return monthly_dataframe