James McCool
commited on
Commit
·
d57d310
1
Parent(s):
7984d0e
Update duplication frame in app.py to include 'dupes' and 'average_dupes' metrics
Browse files- Modified the duplication frame to include 'dupes' instead of 'EntryCount' for clarity.
- Added 'average_dupes' column to calculate the mean of duplicates per BaseName.
- Adjusted the DataFrame to retain relevant columns and remove duplicates based on 'BaseName', enhancing data analysis.
app.py
CHANGED
@@ -424,7 +424,7 @@ with tab2:
|
|
424 |
st.dataframe(st.session_state['general_frame'].style.background_gradient(cmap='RdYlGn', axis=1).format(precision=2), hide_index=True)
|
425 |
|
426 |
with tab5:
|
427 |
-
st.session_state['duplication_frame'] = working_df[['BaseName', 'EntryCount', 'uniques', 'under_5', 'under_10']]
|
428 |
st.session_state['duplication_frame']['average_dupes'] = st.session_state['duplication_frame'].groupby('BaseName')['dupes'].mean()
|
429 |
-
st.session_state['duplication_frame'] = st.session_state['duplication_frame'].drop_duplicates(subset='BaseName', keep='first')
|
430 |
st.dataframe(st.session_state['duplication_frame'].style.background_gradient(cmap='RdYlGn_r', axis=1).format(precision=2), hide_index=True)
|
|
|
424 |
st.dataframe(st.session_state['general_frame'].style.background_gradient(cmap='RdYlGn', axis=1).format(precision=2), hide_index=True)
|
425 |
|
426 |
with tab5:
|
427 |
+
st.session_state['duplication_frame'] = working_df[['BaseName', 'EntryCount', 'dupes', 'uniques', 'under_5', 'under_10']]
|
428 |
st.session_state['duplication_frame']['average_dupes'] = st.session_state['duplication_frame'].groupby('BaseName')['dupes'].mean()
|
429 |
+
st.session_state['duplication_frame'] = st.session_state['duplication_frame'][['BaseName', 'EntryCount', 'average_dupes', 'uniques', 'under_5', 'under_10']].drop_duplicates(subset='BaseName', keep='first')
|
430 |
st.dataframe(st.session_state['duplication_frame'].style.background_gradient(cmap='RdYlGn_r', axis=1).format(precision=2), hide_index=True)
|