Spaces:
Running
Running
import streamlit as st | |
import pandas as pd | |
from huggingface_hub import hf_hub_download | |
import os | |
import pydeck as pdk | |
from datetime import datetime | |
# Set the Streamlit layout to wide | |
st.set_page_config(layout="wide") | |
# Set your Hugging Face token from environment variable | |
hf_token = os.getenv("pegelonline_dataset_read_only") | |
if hf_token is None: | |
st.error("Hugging Face token not found. Please set the HF_TOKEN environment variable.") | |
else: | |
# Download the dataset file | |
dataset_path_stations = hf_hub_download( | |
repo_id="DSSG-Wasserwacht/pegelonline-dataset", | |
filename="processed/stations.parquet", | |
repo_type="dataset", | |
use_auth_token=hf_token | |
) | |
dataset_path_water_level = hf_hub_download( | |
repo_id="DSSG-Wasserwacht/pegelonline-dataset", | |
filename="processed/current_water_level.parquet", | |
repo_type="dataset", | |
use_auth_token=hf_token | |
) | |
dataset_path_timeseries = hf_hub_download( | |
repo_id="DSSG-Wasserwacht/pegelonline-dataset", | |
filename="processed/timeseries.parquet", | |
repo_type="dataset", | |
use_auth_token=hf_token | |
) | |
# Load the dataset | |
df_stations = pd.read_parquet(dataset_path_stations) | |
df_water_level = pd.read_parquet(dataset_path_water_level) | |
df_timeseries = pd.read_parquet(dataset_path_timeseries).groupby("uuid").mean("value").round(1).reset_index().rename(columns={"value": "mean_value"}) | |
df = df_stations.merge(df_water_level, how="left", on="uuid") | |
df = df.merge(df_timeseries, how="left", on="uuid") | |
# Format the timestamp nicely | |
df["formatted_timestamp"] = df["timestamp"].apply( | |
lambda x: datetime.fromisoformat(x).strftime("%Y-%m-%d %H:%M:%S") if pd.notna(x) else None | |
) | |
# Add arrows based on the result of mean_value - value | |
def add_arrow(row): | |
if pd.isna(row["value"]): | |
return None | |
difference = row["mean_value"] - row["value"] | |
if abs(difference) <= 0.01 * row["mean_value"]: | |
return "➡️" | |
elif difference > 0: | |
return "⬇️" | |
else: | |
return "⬆️" | |
df["arrow"] = df.apply(add_arrow, axis=1) | |
# Add traffic light column | |
def add_traffic_light(row): | |
if pd.isna(row["value"]): | |
return None | |
difference = abs(row["mean_value"] - row["value"]) | |
if difference > 0.15 * row["mean_value"]: | |
return "🔴" | |
elif difference > 0.10 * row["mean_value"]: | |
return "🟡" | |
else: | |
return "🟢" | |
df["traffic_light"] = df.apply(add_traffic_light, axis=1) | |
# Define a color mapping for the traffic lights | |
color_mapping = { | |
"🟢": [0, 255, 0, 140], # Green | |
"🟡": [255, 255, 0, 140], # Yellow | |
"🔴": [255, 0, 0, 140], # Red | |
None: [128, 128, 128, 140] # Grey for None | |
} | |
# Map the traffic_light column to colors | |
df["color"] = df["traffic_light"].map(color_mapping) | |
# Streamlit app | |
st.title("Pegelonline Dataset Viewer") | |
st.write("This app displays data from the Pegelonline dataset.") | |
# PyDeck Layer für Kartenanzeige | |
layer = pdk.Layer( | |
"ScatterplotLayer", | |
data=df, | |
get_position=["longitude", "latitude"], | |
get_radius=2000, | |
get_color="color", | |
pickable=True, | |
) | |
# Deck.gl Map | |
view_state = pdk.ViewState( | |
latitude=df["latitude"].mean(), | |
longitude=df["longitude"].mean(), | |
zoom=6, | |
pitch=0, | |
) | |
r = pdk.Deck( | |
layers=[layer], | |
initial_view_state=view_state, | |
tooltip={"text": "{shortname}, {value} cm, {mean_value} cm,\n {arrow} {traffic_light} {formatted_timestamp}"} | |
) | |
st.pydeck_chart(r) | |
# Rohdaten anzeigen | |
st.write("### Rohdaten der Pegelstationen") | |
# Display the dataframe | |
st.dataframe(df[["shortname", "km", "value", "mean_value", "arrow", "traffic_light", "formatted_timestamp"]]) |