import streamlit as st import pandas as pd from huggingface_hub import hf_hub_download import os import pydeck as pdk from datetime import datetime # Set the Streamlit layout to wide st.set_page_config(layout="wide") # Set your Hugging Face token from environment variable hf_token = os.getenv("pegelonline_dataset_read_only") if hf_token is None: st.error("Hugging Face token not found. Please set the HF_TOKEN environment variable.") else: # Download the dataset file dataset_path_stations = hf_hub_download( repo_id="DSSG-Wasserwacht/pegelonline-dataset", filename="processed/stations.parquet", repo_type="dataset", use_auth_token=hf_token ) dataset_path_water_level = hf_hub_download( repo_id="DSSG-Wasserwacht/pegelonline-dataset", filename="processed/current_water_level.parquet", repo_type="dataset", use_auth_token=hf_token ) dataset_path_timeseries = hf_hub_download( repo_id="DSSG-Wasserwacht/pegelonline-dataset", filename="processed/timeseries.parquet", repo_type="dataset", use_auth_token=hf_token ) # Load the dataset df_stations = pd.read_parquet(dataset_path_stations) df_water_level = pd.read_parquet(dataset_path_water_level) df_timeseries = pd.read_parquet(dataset_path_timeseries).groupby("uuid").mean("value").round(1).reset_index().rename(columns={"value": "mean_value"}) df = df_stations.merge(df_water_level, how="left", on="uuid") df = df.merge(df_timeseries, how="left", on="uuid") # Format the timestamp nicely df["formatted_timestamp"] = df["timestamp"].apply( lambda x: datetime.fromisoformat(x).strftime("%Y-%m-%d %H:%M:%S") if pd.notna(x) else None ) # Add arrows based on the result of mean_value - value def add_arrow(row): if pd.isna(row["value"]): return None difference = row["mean_value"] - row["value"] if abs(difference) <= 0.01 * row["mean_value"]: return "➡️" elif difference > 0: return "⬇️" else: return "⬆️" df["arrow"] = df.apply(add_arrow, axis=1) # Add traffic light column def add_traffic_light(row): if pd.isna(row["value"]): return None difference = abs(row["mean_value"] - row["value"]) if difference > 0.15 * row["mean_value"]: return "🔴" elif difference > 0.10 * row["mean_value"]: return "🟡" else: return "🟢" df["traffic_light"] = df.apply(add_traffic_light, axis=1) # Define a color mapping for the traffic lights color_mapping = { "🟢": [0, 255, 0, 140], # Green "🟡": [255, 255, 0, 140], # Yellow "🔴": [255, 0, 0, 140], # Red None: [128, 128, 128, 140] # Grey for None } # Map the traffic_light column to colors df["color"] = df["traffic_light"].map(color_mapping) # Streamlit app st.title("Pegelonline Dataset Viewer") st.write("This app displays data from the Pegelonline dataset.") # PyDeck Layer für Kartenanzeige layer = pdk.Layer( "ScatterplotLayer", data=df, get_position=["longitude", "latitude"], get_radius=2000, get_color="color", pickable=True, ) # Deck.gl Map view_state = pdk.ViewState( latitude=df["latitude"].mean(), longitude=df["longitude"].mean(), zoom=6, pitch=0, ) r = pdk.Deck( layers=[layer], initial_view_state=view_state, tooltip={"text": "{shortname}, {value} cm, {mean_value} cm,\n {arrow} {traffic_light} {formatted_timestamp}"} ) st.pydeck_chart(r) # Rohdaten anzeigen st.write("### Rohdaten der Pegelstationen") # Display the dataframe st.dataframe(df[["shortname", "km", "value", "mean_value", "arrow", "traffic_light", "formatted_timestamp"]])