James McCool commited on
Commit
d57d310
·
1 Parent(s): 7984d0e

Update duplication frame in app.py to include 'dupes' and 'average_dupes' metrics

Browse files

- Modified the duplication frame to include 'dupes' instead of 'EntryCount' for clarity.
- Added 'average_dupes' column to calculate the mean of duplicates per BaseName.
- Adjusted the DataFrame to retain relevant columns and remove duplicates based on 'BaseName', enhancing data analysis.

Files changed (1) hide show
  1. app.py +2 -2
app.py CHANGED
@@ -424,7 +424,7 @@ with tab2:
424
  st.dataframe(st.session_state['general_frame'].style.background_gradient(cmap='RdYlGn', axis=1).format(precision=2), hide_index=True)
425
 
426
  with tab5:
427
- st.session_state['duplication_frame'] = working_df[['BaseName', 'EntryCount', 'uniques', 'under_5', 'under_10']]
428
  st.session_state['duplication_frame']['average_dupes'] = st.session_state['duplication_frame'].groupby('BaseName')['dupes'].mean()
429
- st.session_state['duplication_frame'] = st.session_state['duplication_frame'].drop_duplicates(subset='BaseName', keep='first')
430
  st.dataframe(st.session_state['duplication_frame'].style.background_gradient(cmap='RdYlGn_r', axis=1).format(precision=2), hide_index=True)
 
424
  st.dataframe(st.session_state['general_frame'].style.background_gradient(cmap='RdYlGn', axis=1).format(precision=2), hide_index=True)
425
 
426
  with tab5:
427
+ st.session_state['duplication_frame'] = working_df[['BaseName', 'EntryCount', 'dupes', 'uniques', 'under_5', 'under_10']]
428
  st.session_state['duplication_frame']['average_dupes'] = st.session_state['duplication_frame'].groupby('BaseName')['dupes'].mean()
429
+ st.session_state['duplication_frame'] = st.session_state['duplication_frame'][['BaseName', 'EntryCount', 'average_dupes', 'uniques', 'under_5', 'under_10']].drop_duplicates(subset='BaseName', keep='first')
430
  st.dataframe(st.session_state['duplication_frame'].style.background_gradient(cmap='RdYlGn_r', axis=1).format(precision=2), hide_index=True)