File size: 3,984 Bytes
7f825e4
 
 
 
0dff473
 
 
 
 
7f825e4
 
 
 
 
 
 
 
0dff473
7f825e4
 
 
 
 
0dff473
 
 
 
 
 
 
 
 
 
 
 
7f825e4
 
0dff473
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7f825e4
 
 
 
 
0dff473
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7f825e4
0dff473
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
import streamlit as st
import pandas as pd
from huggingface_hub import hf_hub_download
import os
import pydeck as pdk
from datetime import datetime

# Set the Streamlit layout to wide
st.set_page_config(layout="wide")

# Set your Hugging Face token from environment variable
hf_token = os.getenv("pegelonline_dataset_read_only")

if hf_token is None:
    st.error("Hugging Face token not found. Please set the HF_TOKEN environment variable.")
else:
    # Download the dataset file
    dataset_path_stations = hf_hub_download(
        repo_id="DSSG-Wasserwacht/pegelonline-dataset",
        filename="processed/stations.parquet",
        repo_type="dataset",
        use_auth_token=hf_token
    )
    dataset_path_water_level = hf_hub_download(
        repo_id="DSSG-Wasserwacht/pegelonline-dataset",
        filename="processed/current_water_level.parquet",
        repo_type="dataset",
        use_auth_token=hf_token
    )
    dataset_path_timeseries = hf_hub_download(
        repo_id="DSSG-Wasserwacht/pegelonline-dataset",
        filename="processed/timeseries.parquet",
        repo_type="dataset",
        use_auth_token=hf_token
    )

    # Load the dataset
    df_stations = pd.read_parquet(dataset_path_stations)
    df_water_level = pd.read_parquet(dataset_path_water_level)
    df_timeseries = pd.read_parquet(dataset_path_timeseries).groupby("uuid").mean("value").round(1).reset_index().rename(columns={"value": "mean_value"})
    df = df_stations.merge(df_water_level, how="left", on="uuid")
    df = df.merge(df_timeseries, how="left", on="uuid")

    # Format the timestamp nicely
    df["formatted_timestamp"] = df["timestamp"].apply(
        lambda x: datetime.fromisoformat(x).strftime("%Y-%m-%d %H:%M:%S") if pd.notna(x) else None
        )

    # Add arrows based on the result of mean_value - value
    def add_arrow(row):
        if pd.isna(row["value"]):
            return None
        difference = row["mean_value"] - row["value"]
        if abs(difference) <= 0.01 * row["mean_value"]:
            return "➡️"
        elif difference > 0:
            return "⬇️"
        else:
            return "⬆️"

    df["arrow"] = df.apply(add_arrow, axis=1)

    # Add traffic light column
    def add_traffic_light(row):
        if pd.isna(row["value"]):
            return None
        difference = abs(row["mean_value"] - row["value"])
        if difference > 0.15 * row["mean_value"]:
            return "🔴"
        elif difference > 0.10 * row["mean_value"]:
            return "🟡"
        else:
            return "🟢"

    df["traffic_light"] = df.apply(add_traffic_light, axis=1)

    # Define a color mapping for the traffic lights
    color_mapping = {
        "🟢": [0, 255, 0, 140],  # Green
        "🟡": [255, 255, 0, 140],  # Yellow
        "🔴": [255, 0, 0, 140],   # Red
        None: [128, 128, 128, 140]  # Grey for None
    }

    # Map the traffic_light column to colors
    df["color"] = df["traffic_light"].map(color_mapping)

    # Streamlit app
    st.title("Pegelonline Dataset Viewer")
    st.write("This app displays data from the Pegelonline dataset.")

    # PyDeck Layer für Kartenanzeige
    layer = pdk.Layer(
        "ScatterplotLayer",
        data=df,
        get_position=["longitude", "latitude"],
        get_radius=2000,
        get_color="color",
        pickable=True,
    )

    # Deck.gl Map
    view_state = pdk.ViewState(
        latitude=df["latitude"].mean(),
        longitude=df["longitude"].mean(),
        zoom=6,
        pitch=0,
    )
    r = pdk.Deck(
        layers=[layer], 
        initial_view_state=view_state, 
        tooltip={"text": "{shortname}, {value} cm, {mean_value} cm,\n {arrow} {traffic_light} {formatted_timestamp}"}
    )
    st.pydeck_chart(r)

    # Rohdaten anzeigen
    st.write("### Rohdaten der Pegelstationen")
    
    # Display the dataframe
    st.dataframe(df[["shortname", "km", "value", "mean_value", "arrow", "traffic_light", "formatted_timestamp"]])