Spaces:

hackathon-ombrea
/

gaia

Runtime error

File size: 6,222 Bytes

from typing import Type

from crewai.tools import BaseTool
from pydantic import BaseModel, Field

import openmeteo_requests
import requests_cache
from retry_requests import retry

import pandas as pd

class MyCustomToolInput(BaseModel):
    """Input schema for MyCustomTool."""

    argument: str = Field(..., description="Description of the argument.")


class MyCustomTool(BaseTool):
    name: str = "Name of my tool"
    description: str = (
        "Clear description for what this tool is useful for, your agent will need this information to use it."
    )
    args_schema: Type[BaseModel] = MyCustomToolInput

    def _run(self, argument: str) -> str:
        # Implementation goes here
        return "this is an example of a tool output, ignore it and move along."

class rgp_cultures_regions(BaseTool):
    name: str = "rgp_cultures_regions"
    description: str = "Extraite les cultures les plus populaires en terme de surface cultuvé en hectare pour une région donnée en france métropolitaine"

    def _run(self, region: str) -> pd.DataFrame:
        """
        Extract the most popular cultures in terms of cultivated area in hectares for a given region in metropolitan France
        schema of the DataFrame:
        CODE_CULTU : code of the culture
        REGION: name of the region
        SURF_PARC: cultivated area in hectares
        CODE_CULTURE: code of the culture
        LIBELLE_CULTURE: name of the culture
        LIBELLE_GROUPE_CULTURE: name of the culture group
        ACTIF: oui or non
        
        Args:
            region (str): name of region in metropolitan France
        Returns:
            pd.DataFrame: DataFrame containing the most popular cultures in terms of cultivated area in hectares for a given region in metropolitan France
        """

        # load the data
        df_rpg = pd.read_csv("data/data_rpg/data_prepared_rpg.csv")

        # filter the data (lower case)
        df_rpg["REGION"] = df_rpg["REGION"].str.lower()
        region = region.lower()
        df_rpg = df_rpg[df_rpg["REGION"] == region]
        # kepp only the top 10 more cultivae cultures
        df_rpg = df_rpg.sort_values(by="SURF_PARC", ascending=False).head(10)
        return df_rpg

class MeteoTool(BaseTool):
    name: str = "meteo_tool"
    description: str = "Extraite les données météorologiques des dix dernières années pour la région identifiée en utilisant des localisations GPS du centre de la région"
    
    def _run(self, latitude: float, longitude: float) -> pd.DataFrame:
        """
        Extract the meteorological data for the last ten years for the identified region using GPS locations of the center of the region
        schema of the DataFrame:
        year_month : month and year
        month : month
        year : year
        temperature_2m_max : temperature in celsius at 2m height
        temperature_2m_min : temperature in celsius at 2m height
        precipitation_sum : precipitation in mm
        sunchine_duration : sunshine duration in hours
        
        Args:
            latitude (float): latitude of the center of the region
            longitude (float): longitude of the center of the region
        Returns:
            pd.DataFrame: DataFrame containing the meteorological data for the identified region using GPS locations of the center of the region
        """
        cache_session = requests_cache.CachedSession('.cache', expire_after = 300)
        retry_session = retry(cache_session, retries = 5, backoff_factor = 0.2)
        openmeteo = openmeteo_requests.Client(session = retry_session)
        url = "https://archive-api.open-meteo.com/v1/archive"
        params = {
            "latitude": latitude,
            "longitude": longitude,
            "start_date": "2015-01-01",
            "end_date": "2024-12-31",
            "daily": ["temperature_2m_max", "temperature_2m_min", "sunshine_duration", "precipitation_sum"] }
        responses = openmeteo.weather_api(url, params=params)

        # Process first location. Add a for-loop for multiple locations or weather models
        response = responses[0]
        print(f"Coordinates {response.Latitude()}°N {response.Longitude()}°E")
        print(f"Elevation {response.Elevation()} m asl")
        print(f"Timezone {response.Timezone()} {response.TimezoneAbbreviation()}")
        print(f"Timezone difference to GMT+0 {response.UtcOffsetSeconds()} s")

        # Process daily data. The order of variables needs to be the same as requested.
        daily = response.Daily()
        daily_temperature_2m_max = daily.Variables(0).ValuesAsNumpy()
        daily_temperature_2m_min = daily.Variables(1).ValuesAsNumpy()
        daily_sunshine_duration = daily.Variables(2).ValuesAsNumpy()
        daily_precipitation_sum = daily.Variables(3).ValuesAsNumpy()

        daily_data = {"date": pd.date_range(
            start = pd.to_datetime(daily.Time(), unit = "s", utc = True),
            end = pd.to_datetime(daily.TimeEnd(), unit = "s", utc = True),
            freq = pd.Timedelta(seconds = daily.Interval()),
            inclusive = "left"
        )}

        daily_data["temperature_2m_max"] = daily_temperature_2m_max
        daily_data["temperature_2m_min"] = daily_temperature_2m_min
        daily_data["sunshine_duration"] = daily_sunshine_duration
        daily_data["precipitation_sum"] = daily_precipitation_sum

        daily_dataframe = pd.DataFrame(data = daily_data)
        daily_dataframe["sunshine_duration"] = daily_dataframe["sunshine_duration"] / 3600
        
        # aggregate data to monthly
        daily_dataframe["month"] = daily_dataframe["date"].dt.month
        daily_dataframe["year"] = daily_dataframe["date"].dt.year
        daily_dataframe["year_month"] = daily_dataframe.apply(lambda x: x["date"].strftime("%Y%m"), axis=1)
        monthly_dataframe = daily_dataframe.groupby(["year_month","year","month"]).agg(
            temperature_2m_max = ("temperature_2m_max", "mean"),
            temperature_2m_min = ("temperature_2m_min", "mean"),
            precipitation_sum = ("precipitation_sum", "sum"),
            sunchine_duration = ("sunshine_duration", "sum")
        ).reset_index()
        return monthly_dataframe