Spaces:

hackathon-ombrea
/

gaia

Runtime error

gaia / chatbot_gaia /src /tools /custom_tool.py

Hugo Massonnat

move rpg data in the data folder

e76e66b 4 months ago

6.22 kB

	from typing import Type

	from crewai.tools import BaseTool
	from pydantic import BaseModel, Field

	import openmeteo_requests
	import requests_cache
	from retry_requests import retry

	import pandas as pd

	class MyCustomToolInput(BaseModel):
	"""Input schema for MyCustomTool."""

	argument: str = Field(..., description="Description of the argument.")


	class MyCustomTool(BaseTool):
	name: str = "Name of my tool"
	description: str = (
	"Clear description for what this tool is useful for, your agent will need this information to use it."
	)
	args_schema: Type[BaseModel] = MyCustomToolInput

	def _run(self, argument: str) -> str:
	# Implementation goes here
	return "this is an example of a tool output, ignore it and move along."

	class rgp_cultures_regions(BaseTool):
	name: str = "rgp_cultures_regions"
	description: str = "Extraite les cultures les plus populaires en terme de surface cultuvé en hectare pour une région donnée en france métropolitaine"

	def _run(self, region: str) -> pd.DataFrame:
	"""
	Extract the most popular cultures in terms of cultivated area in hectares for a given region in metropolitan France
	schema of the DataFrame:
	CODE_CULTU : code of the culture
	REGION: name of the region
	SURF_PARC: cultivated area in hectares
	CODE_CULTURE: code of the culture
	LIBELLE_CULTURE: name of the culture
	LIBELLE_GROUPE_CULTURE: name of the culture group
	ACTIF: oui or non

	Args:
	region (str): name of region in metropolitan France
	Returns:
	pd.DataFrame: DataFrame containing the most popular cultures in terms of cultivated area in hectares for a given region in metropolitan France
	"""

	# load the data
	df_rpg = pd.read_csv("data/data_rpg/data_prepared_rpg.csv")

	# filter the data (lower case)
	df_rpg["REGION"] = df_rpg["REGION"].str.lower()
	region = region.lower()
	df_rpg = df_rpg[df_rpg["REGION"] == region]
	# kepp only the top 10 more cultivae cultures
	df_rpg = df_rpg.sort_values(by="SURF_PARC", ascending=False).head(10)
	return df_rpg

	class MeteoTool(BaseTool):
	name: str = "meteo_tool"
	description: str = "Extraite les données météorologiques des dix dernières années pour la région identifiée en utilisant des localisations GPS du centre de la région"

	def _run(self, latitude: float, longitude: float) -> pd.DataFrame:
	"""
	Extract the meteorological data for the last ten years for the identified region using GPS locations of the center of the region
	schema of the DataFrame:
	year_month : month and year
	month : month
	year : year
	temperature_2m_max : temperature in celsius at 2m height
	temperature_2m_min : temperature in celsius at 2m height
	precipitation_sum : precipitation in mm
	sunchine_duration : sunshine duration in hours

	Args:
	latitude (float): latitude of the center of the region
	longitude (float): longitude of the center of the region
	Returns:
	pd.DataFrame: DataFrame containing the meteorological data for the identified region using GPS locations of the center of the region
	"""
	cache_session = requests_cache.CachedSession('.cache', expire_after = 300)
	retry_session = retry(cache_session, retries = 5, backoff_factor = 0.2)
	openmeteo = openmeteo_requests.Client(session = retry_session)
	url = "https://archive-api.open-meteo.com/v1/archive"
	params = {
	"latitude": latitude,
	"longitude": longitude,
	"start_date": "2015-01-01",
	"end_date": "2024-12-31",
	"daily": ["temperature_2m_max", "temperature_2m_min", "sunshine_duration", "precipitation_sum"] }
	responses = openmeteo.weather_api(url, params=params)

	# Process first location. Add a for-loop for multiple locations or weather models
	response = responses[0]
	print(f"Coordinates {response.Latitude()}°N {response.Longitude()}°E")
	print(f"Elevation {response.Elevation()} m asl")
	print(f"Timezone {response.Timezone()} {response.TimezoneAbbreviation()}")
	print(f"Timezone difference to GMT+0 {response.UtcOffsetSeconds()} s")

	# Process daily data. The order of variables needs to be the same as requested.
	daily = response.Daily()
	daily_temperature_2m_max = daily.Variables(0).ValuesAsNumpy()
	daily_temperature_2m_min = daily.Variables(1).ValuesAsNumpy()
	daily_sunshine_duration = daily.Variables(2).ValuesAsNumpy()
	daily_precipitation_sum = daily.Variables(3).ValuesAsNumpy()

	daily_data = {"date": pd.date_range(
	start = pd.to_datetime(daily.Time(), unit = "s", utc = True),
	end = pd.to_datetime(daily.TimeEnd(), unit = "s", utc = True),
	freq = pd.Timedelta(seconds = daily.Interval()),
	inclusive = "left"
	)}

	daily_data["temperature_2m_max"] = daily_temperature_2m_max
	daily_data["temperature_2m_min"] = daily_temperature_2m_min
	daily_data["sunshine_duration"] = daily_sunshine_duration
	daily_data["precipitation_sum"] = daily_precipitation_sum

	daily_dataframe = pd.DataFrame(data = daily_data)
	daily_dataframe["sunshine_duration"] = daily_dataframe["sunshine_duration"] / 3600

	# aggregate data to monthly
	daily_dataframe["month"] = daily_dataframe["date"].dt.month
	daily_dataframe["year"] = daily_dataframe["date"].dt.year
	daily_dataframe["year_month"] = daily_dataframe.apply(lambda x: x["date"].strftime("%Y%m"), axis=1)
	monthly_dataframe = daily_dataframe.groupby(["year_month","year","month"]).agg(
	temperature_2m_max = ("temperature_2m_max", "mean"),
	temperature_2m_min = ("temperature_2m_min", "mean"),
	precipitation_sum = ("precipitation_sum", "sum"),
	sunchine_duration = ("sunshine_duration", "sum")
	).reset_index()
	return monthly_dataframe