Jon Solow commited on
Commit
e16d433
·
1 Parent(s): e2b52d0

Add streamlit filter and make some lint adjustments

Browse files
Files changed (2) hide show
  1. src/streamlit_app.py +3 -2
  2. src/streamlit_filter.py +87 -0
src/streamlit_app.py CHANGED
@@ -1,6 +1,7 @@
1
  import os
2
  import streamlit as st
3
  import pandas as pd
 
4
 
5
 
6
  LEAGUE_NAME = "LOFG"
@@ -32,8 +33,8 @@ def get_app():
32
  "eligible",
33
  ]
34
 
35
- st.subheader("Raw data")
36
- st.write(data[raw_display_columns])
37
 
38
 
39
  if __name__ == "__main__":
 
1
  import os
2
  import streamlit as st
3
  import pandas as pd
4
+ from streamlit_filter import filter_dataframe
5
 
6
 
7
  LEAGUE_NAME = "LOFG"
 
33
  "eligible",
34
  ]
35
 
36
+ filtered_data = filter_dataframe(data[raw_display_columns])
37
+ st.write(filtered_data)
38
 
39
 
40
  if __name__ == "__main__":
src/streamlit_filter.py ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # https://blog.streamlit.io/auto-generate-a-dataframe-filtering-ui-in-streamlit-with-filter_dataframe/
2
+
3
+ from pandas.api.types import (
4
+ is_categorical_dtype,
5
+ is_datetime64_any_dtype,
6
+ is_numeric_dtype,
7
+ is_object_dtype,
8
+ )
9
+ import pandas as pd
10
+ import streamlit as st
11
+
12
+
13
+ def filter_dataframe(df: pd.DataFrame) -> pd.DataFrame:
14
+ """
15
+ Adds a UI on top of a dataframe to let viewers filter columns
16
+
17
+ Args:
18
+ df (pd.DataFrame): Original dataframe
19
+
20
+ Returns:
21
+ pd.DataFrame: Filtered dataframe
22
+ """
23
+ modify = st.checkbox("Add filters")
24
+
25
+ if not modify:
26
+ return df
27
+
28
+ df = df.copy()
29
+
30
+ # Try to convert datetimes into a standard format (datetime, no timezone)
31
+ for col in df.columns:
32
+ if is_object_dtype(df[col]):
33
+ try:
34
+ df[col] = pd.to_datetime(df[col])
35
+ except Exception:
36
+ pass
37
+
38
+ if is_datetime64_any_dtype(df[col]):
39
+ df[col] = df[col].dt.tz_localize(None)
40
+
41
+ modification_container = st.container()
42
+
43
+ with modification_container:
44
+ to_filter_columns = st.multiselect("Filter dataframe on", df.columns)
45
+ for column in to_filter_columns:
46
+ left, right = st.columns((1, 20))
47
+ # Treat columns with < 17 unique values as categorical
48
+ if is_categorical_dtype(df[column]) or df[column].nunique() < 17:
49
+ user_cat_input = right.multiselect(
50
+ f"Values for {column}",
51
+ df[column].unique(),
52
+ default=list(df[column].unique()),
53
+ )
54
+ df = df[df[column].isin(user_cat_input)]
55
+ elif is_numeric_dtype(df[column]):
56
+ _min = float(df[column].min())
57
+ _max = float(df[column].max())
58
+ step = (_max - _min) / 100
59
+ user_num_input = right.slider(
60
+ f"Values for {column}",
61
+ min_value=_min,
62
+ max_value=_max,
63
+ value=(_min, _max),
64
+ step=step,
65
+ )
66
+ df = df[df[column].between(*user_num_input)]
67
+ elif is_datetime64_any_dtype(df[column]):
68
+ user_date_input = right.date_input(
69
+ f"Values for {column}",
70
+ value=(
71
+ df[column].min(),
72
+ df[column].max(),
73
+ ),
74
+ )
75
+ if isinstance(user_date_input, tuple):
76
+ if len(user_date_input) == 2:
77
+ user_date_input_dt = tuple(map(pd.to_datetime, user_date_input))
78
+ start_date, end_date = user_date_input_dt
79
+ df = df.loc[df[column].between(start_date, end_date)]
80
+ else:
81
+ user_text_input = right.text_input(
82
+ f"Substring or regex in {column}",
83
+ )
84
+ if user_text_input:
85
+ df = df[df[column].astype(str).str.contains(user_text_input)]
86
+
87
+ return df