from datetime import datetime import streamlit as st import pandas as pd # from load_dataframe import get_data # Main Streamlit app def main(): st.title("Hugging Face Papers KPI Dashboard") # TODO use this instead # df = get_data() df = pd.read_csv('/Users/nielsrogge/Downloads/daily_papers_enriched (1).csv') df = df.drop(['Unnamed: 0'], axis=1) # Use date as index # Note that it's a string, not a datetime df = df.set_index('date') df.index = pd.to_datetime(df.index).strftime('%d-%m-%Y') df = df.sort_index() # Button to select day, month or week # Add streamlit selectbox. view_level = st.selectbox(label="View data per day, week or month", options=["day", "week", "month"]) if view_level == "day": # make a button to select the day, defaulting to today day = st.date_input("Select day", value="today", format="DD/MM/YYYY") # convert to the day of a Pandas Timestamp day = pd.Timestamp(day) print("Day:", day) df = df.loc[day.strftime('%d-%m-%Y'):day.strftime('%d-%m-%Y')] st.write(f"Showing data for {day.strftime('%d/%m/%Y')}") st.markdown(f""" ## Number of papers: {df.shape[0]} #### Number of papers with a Github link: {df['github'].notnull().sum()} #### Number of papers with at least one HF artifact: {df['num_models'].sum()} """) st.dataframe(df, hide_index=True, column_order=("paper_page", "title", "github", "num_models", "num_datasets", "num_spaces"), column_config={"github": st.column_config.LinkColumn(), "paper_page": st.column_config.LinkColumn()}, width=2000) elif view_level == "week": # make a button to select the week week = st.sidebar.date_input("Select week", value=pd.Timestamp.today().isocalendar()) df = df.loc[df['date'].dt.isocalendar().week == week.isocalendar().week] st.write(f"Showing data for {day}") st.dataframe(df) elif view_level == "month": # make a button to select the month, defaulting to current month month = st.sidebar.date_input("Select month", value=pd.Timestamp.today().month_name()) df = df.loc[df['date'].dt.month_name() == month] st.write(f"Showing data for {day}") st.dataframe(df) # Display data based on aggregation level if __name__ == "__main__": main()