RiverRadar / pages /HF_Dataset.py
bsenst's picture
add trend data visualization
0dff473
import streamlit as st
import pandas as pd
from huggingface_hub import hf_hub_download
import os
import pydeck as pdk
from datetime import datetime
# Set the Streamlit layout to wide
st.set_page_config(layout="wide")
# Set your Hugging Face token from environment variable
hf_token = os.getenv("pegelonline_dataset_read_only")
if hf_token is None:
st.error("Hugging Face token not found. Please set the HF_TOKEN environment variable.")
else:
# Download the dataset file
dataset_path_stations = hf_hub_download(
repo_id="DSSG-Wasserwacht/pegelonline-dataset",
filename="processed/stations.parquet",
repo_type="dataset",
use_auth_token=hf_token
)
dataset_path_water_level = hf_hub_download(
repo_id="DSSG-Wasserwacht/pegelonline-dataset",
filename="processed/current_water_level.parquet",
repo_type="dataset",
use_auth_token=hf_token
)
dataset_path_timeseries = hf_hub_download(
repo_id="DSSG-Wasserwacht/pegelonline-dataset",
filename="processed/timeseries.parquet",
repo_type="dataset",
use_auth_token=hf_token
)
# Load the dataset
df_stations = pd.read_parquet(dataset_path_stations)
df_water_level = pd.read_parquet(dataset_path_water_level)
df_timeseries = pd.read_parquet(dataset_path_timeseries).groupby("uuid").mean("value").round(1).reset_index().rename(columns={"value": "mean_value"})
df = df_stations.merge(df_water_level, how="left", on="uuid")
df = df.merge(df_timeseries, how="left", on="uuid")
# Format the timestamp nicely
df["formatted_timestamp"] = df["timestamp"].apply(
lambda x: datetime.fromisoformat(x).strftime("%Y-%m-%d %H:%M:%S") if pd.notna(x) else None
)
# Add arrows based on the result of mean_value - value
def add_arrow(row):
if pd.isna(row["value"]):
return None
difference = row["mean_value"] - row["value"]
if abs(difference) <= 0.01 * row["mean_value"]:
return "➡️"
elif difference > 0:
return "⬇️"
else:
return "⬆️"
df["arrow"] = df.apply(add_arrow, axis=1)
# Add traffic light column
def add_traffic_light(row):
if pd.isna(row["value"]):
return None
difference = abs(row["mean_value"] - row["value"])
if difference > 0.15 * row["mean_value"]:
return "🔴"
elif difference > 0.10 * row["mean_value"]:
return "🟡"
else:
return "🟢"
df["traffic_light"] = df.apply(add_traffic_light, axis=1)
# Define a color mapping for the traffic lights
color_mapping = {
"🟢": [0, 255, 0, 140], # Green
"🟡": [255, 255, 0, 140], # Yellow
"🔴": [255, 0, 0, 140], # Red
None: [128, 128, 128, 140] # Grey for None
}
# Map the traffic_light column to colors
df["color"] = df["traffic_light"].map(color_mapping)
# Streamlit app
st.title("Pegelonline Dataset Viewer")
st.write("This app displays data from the Pegelonline dataset.")
# PyDeck Layer für Kartenanzeige
layer = pdk.Layer(
"ScatterplotLayer",
data=df,
get_position=["longitude", "latitude"],
get_radius=2000,
get_color="color",
pickable=True,
)
# Deck.gl Map
view_state = pdk.ViewState(
latitude=df["latitude"].mean(),
longitude=df["longitude"].mean(),
zoom=6,
pitch=0,
)
r = pdk.Deck(
layers=[layer],
initial_view_state=view_state,
tooltip={"text": "{shortname}, {value} cm, {mean_value} cm,\n {arrow} {traffic_light} {formatted_timestamp}"}
)
st.pydeck_chart(r)
# Rohdaten anzeigen
st.write("### Rohdaten der Pegelstationen")
# Display the dataframe
st.dataframe(df[["shortname", "km", "value", "mean_value", "arrow", "traffic_light", "formatted_timestamp"]])