nielsr HF staff commited on
Commit
99a2513
β€’
1 Parent(s): b864a26

More improvements

Browse files
Files changed (1) hide show
  1. app.py +43 -44
app.py CHANGED
@@ -2,10 +2,8 @@ from datetime import datetime
2
 
3
  import streamlit as st
4
  import pandas as pd
5
- import numpy as np
6
  import matplotlib.pyplot as plt
7
 
8
- from datasets import Dataset
9
  from load_dataframe import get_data
10
 
11
 
@@ -49,8 +47,8 @@ def aggregated_data(df, aggregation_level="week"):
49
  st.pyplot(plt)
50
 
51
 
52
- def show_data_editor(df: pd.DataFrame, key: str):
53
- edited_df = st.data_editor(df,
54
  hide_index=True,
55
  column_order=("reached_out", "reached_out_link", "paper_page", "title", "github", "num_models", "num_datasets", "num_spaces"),
56
  column_config={"github": st.column_config.LinkColumn(),
@@ -59,47 +57,38 @@ def show_data_editor(df: pd.DataFrame, key: str):
59
  width=2000,
60
  key=key)
61
 
62
- # Check if the dataframe has been edited
63
- # TODO this is wrong
64
- # rather we should probably do a merge-join (overwriting the edited rows) and then save the new dataframe
65
- # if not edited_df.equals(df):
66
- # save_data(edited_df)
67
- # st.success("Changes saved successfully!")
68
 
69
 
70
- def save_data(df: pd.DataFrame):
71
- # load as HF dataset
72
- dataset = Dataset.from_pandas(df)
73
-
74
- raise NotImplementedError("To do")
75
-
76
-
77
- def display_data(df: pd.DataFrame):
78
- df['has_artifact'] = (df['num_models'] > 0) | (df['num_datasets'] > 0) | (df['num_spaces'] > 0)
79
- num_artifacts = df['has_artifact'].sum()
80
- percentage_of_at_least_one_artifact = num_artifacts / df.shape[0] if df.shape[0] > 0 else 0
81
  percentage_of_at_least_one_artifact = round(percentage_of_at_least_one_artifact * 100, 2)
82
 
83
- # add reached out and reached out link columns
84
- df['reached_out'] = [False for _ in range(df.shape[0])]
85
- df["reached_out_link"] = ["" for _ in range(df.shape[0])]
86
-
87
  st.markdown(f"""
88
  ## {percentage_of_at_least_one_artifact}% papers with at least one πŸ€— artifact
89
 
90
- * Number of papers: {df.shape[0]}
91
- * Number of papers with a Github link: {df['github'].notnull().sum()}
92
  * Number of papers with at least one HF artifact: {num_artifacts}
93
  """)
94
 
95
  st.write("Papers with at least one artifact")
96
- show_data_editor(df[df['has_artifact']], key="papers_with_artifacts")
 
97
 
98
  st.write("Papers without artifacts")
99
- show_data_editor(df[~df['has_artifact']], key="papers_without_artifacts")
 
100
 
101
  st.write("Papers with a HF mention in README but no artifacts")
102
- show_data_editor(df[(df['hf_mention'] == 1) & (~df['has_artifact'])], key="papers_with_hf_mention_no_artifacts")
 
103
 
104
 
105
  def main():
@@ -109,31 +98,41 @@ def main():
109
  st.sidebar.title("Navigation")
110
  selection = st.sidebar.selectbox("Go to", ["Daily/weekly/monthly data", "Aggregated data"])
111
 
 
 
 
 
 
 
 
 
 
 
112
  if selection == "Daily/weekly/monthly data":
113
  # Button to select day, month or week
114
  # Add streamlit selectbox.
115
  view_level = st.selectbox(label="View data per day, week or month", options=["day", "week", "month"])
116
 
117
  if view_level == "day":
118
- # get the latest dataframe
119
- df = get_data()
120
-
121
  # make a button to select the day, defaulting to today
122
  day = st.date_input("Select day", value="today", format="DD/MM/YYYY")
123
  # convert to the day of a Pandas Timestamp
124
  day = pd.Timestamp(day)
125
 
 
 
 
126
  filtered_df = df[df.index.date == day.date()]
127
 
128
  st.write(f"Showing data for {day.day_name()} {day.strftime('%d/%m/%Y')}")
129
- display_data(df=filtered_df)
130
 
131
- elif view_level == "week":
132
- # get the latest dataframe
133
- df = get_data()
134
-
135
  # make a button to select the week
136
  week_number = st.number_input("Select week", value=datetime.today().isocalendar()[1], min_value=1, max_value=52)
 
 
 
137
 
138
  # Extract week number from the index
139
  df['week'] = df.index.isocalendar().week
@@ -143,15 +142,15 @@ def main():
143
 
144
  st.write(f"Showing data for week {week_number}")
145
 
146
- display_data(df=filtered_df)
147
 
148
- elif view_level == "month":
149
- # get the latest dataframe
150
- df = get_data()
151
-
152
  # make a button to select the month, defaulting to current month
153
  month_str = st.selectbox("Select month", options=["January", "February", "March", "April", "May", "June", "July", "August", "September", "October", "November", "December"])
154
  year_str = st.selectbox("Select year", options=["2024"])
 
 
 
155
 
156
  # Filter the dataframe for the desired week number
157
  month_map = {
@@ -167,7 +166,7 @@ def main():
167
 
168
  st.write(f"Showing data for {month_str} {year_str}")
169
 
170
- display_data(df=filtered_df)
171
 
172
  elif selection == "Aggregated data":
173
 
 
2
 
3
  import streamlit as st
4
  import pandas as pd
 
5
  import matplotlib.pyplot as plt
6
 
 
7
  from load_dataframe import get_data
8
 
9
 
 
47
  st.pyplot(plt)
48
 
49
 
50
+ def show_data_editor(filtered_df: pd.DataFrame, key: str):
51
+ edited_df = st.data_editor(filtered_df,
52
  hide_index=True,
53
  column_order=("reached_out", "reached_out_link", "paper_page", "title", "github", "num_models", "num_datasets", "num_spaces"),
54
  column_config={"github": st.column_config.LinkColumn(),
 
57
  width=2000,
58
  key=key)
59
 
60
+ if edited_df is not None and not edited_df.equals(filtered_df):
61
+ # update the df of the session state with the affected rows
62
+ # TODO there seems to be a bug in here
63
+ original_df = st.session_state.df
64
+ original_df.update(edited_df)
65
+ st.session_state.df = original_df
66
 
67
 
68
+ def display_data(filtered_df: pd.DataFrame):
69
+ num_artifacts = filtered_df['has_artifact'].sum()
70
+ percentage_of_at_least_one_artifact = num_artifacts / filtered_df.shape[0] if filtered_df.shape[0] > 0 else 0
 
 
 
 
 
 
 
 
71
  percentage_of_at_least_one_artifact = round(percentage_of_at_least_one_artifact * 100, 2)
72
 
 
 
 
 
73
  st.markdown(f"""
74
  ## {percentage_of_at_least_one_artifact}% papers with at least one πŸ€— artifact
75
 
76
+ * Number of papers: {filtered_df.shape[0]}
77
+ * Number of papers with a Github link: {(filtered_df['github'].values != '').sum()}
78
  * Number of papers with at least one HF artifact: {num_artifacts}
79
  """)
80
 
81
  st.write("Papers with at least one artifact")
82
+ show_data_editor(filtered_df=filtered_df[filtered_df['has_artifact']],
83
+ key="papers_with_artifacts")
84
 
85
  st.write("Papers without artifacts")
86
+ show_data_editor(filtered_df=filtered_df[~filtered_df['has_artifact']],
87
+ key="papers_without_artifacts")
88
 
89
  st.write("Papers with a HF mention in README but no artifacts")
90
+ show_data_editor(filtered_df=filtered_df[(filtered_df['hf_mention'] == 1) & (~filtered_df['has_artifact'])],
91
+ key="papers_with_hf_mention_no_artifacts")
92
 
93
 
94
  def main():
 
98
  st.sidebar.title("Navigation")
99
  selection = st.sidebar.selectbox("Go to", ["Daily/weekly/monthly data", "Aggregated data"])
100
 
101
+ # Initialize session state
102
+ if 'df' not in st.session_state:
103
+ df = get_data()
104
+ # add has_artifact, reached out and reached out link columns
105
+ # TODO remove since this will overwrite everything if we have added data before
106
+ df['has_artifact'] = (df['num_models'] > 0) | (df['num_datasets'] > 0) | (df['num_spaces'] > 0)
107
+ df['reached_out'] = [False for _ in range(df.shape[0])]
108
+ df["reached_out_link"] = ["" for _ in range(df.shape[0])]
109
+ st.session_state.df = df
110
+
111
  if selection == "Daily/weekly/monthly data":
112
  # Button to select day, month or week
113
  # Add streamlit selectbox.
114
  view_level = st.selectbox(label="View data per day, week or month", options=["day", "week", "month"])
115
 
116
  if view_level == "day":
 
 
 
117
  # make a button to select the day, defaulting to today
118
  day = st.date_input("Select day", value="today", format="DD/MM/YYYY")
119
  # convert to the day of a Pandas Timestamp
120
  day = pd.Timestamp(day)
121
 
122
+ # fetch df from sessions state
123
+ df = st.session_state.df
124
+
125
  filtered_df = df[df.index.date == day.date()]
126
 
127
  st.write(f"Showing data for {day.day_name()} {day.strftime('%d/%m/%Y')}")
128
+ display_data(filtered_df=filtered_df)
129
 
130
+ elif view_level == "week":
 
 
 
131
  # make a button to select the week
132
  week_number = st.number_input("Select week", value=datetime.today().isocalendar()[1], min_value=1, max_value=52)
133
+
134
+ # fetch df from sessions state
135
+ df = st.session_state.df
136
 
137
  # Extract week number from the index
138
  df['week'] = df.index.isocalendar().week
 
142
 
143
  st.write(f"Showing data for week {week_number}")
144
 
145
+ display_data(filtered_df=filtered_df)
146
 
147
+ elif view_level == "month":
 
 
 
148
  # make a button to select the month, defaulting to current month
149
  month_str = st.selectbox("Select month", options=["January", "February", "March", "April", "May", "June", "July", "August", "September", "October", "November", "December"])
150
  year_str = st.selectbox("Select year", options=["2024"])
151
+
152
+ # fetch df from sessions state
153
+ df = st.session_state.df
154
 
155
  # Filter the dataframe for the desired week number
156
  month_map = {
 
166
 
167
  st.write(f"Showing data for {month_str} {year_str}")
168
 
169
+ display_data(filtered_df=filtered_df)
170
 
171
  elif selection == "Aggregated data":
172