James McCool
commited on
Commit
·
62a6685
1
Parent(s):
2ceda65
Add unique and under-5/under-10 duplicate counts to duplication frame in app.py
Browse files- Introduced calculations for 'uniques', 'under_5', and 'under_10' metrics in the working DataFrame, enhancing the analysis of duplicate lineups.
- Updated the duplication frame to include these new metrics, improving data clarity and analysis capabilities.
app.py
CHANGED
@@ -187,6 +187,19 @@ with tab2:
|
|
187 |
axis=1
|
188 |
)
|
189 |
working_df['dupes'] = working_df.groupby('sorted').transform('size')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
190 |
working_df = working_df.reset_index()
|
191 |
working_df['percentile_finish'] = working_df['index'].rank(pct=True)
|
192 |
working_df['finish'] = working_df['index']
|
@@ -433,8 +446,8 @@ with tab2:
|
|
433 |
dupe_frame['uniques%'] = dupe_frame['uniques'] / dupe_frame['EntryCount']
|
434 |
dupe_frame['under_5%'] = dupe_frame['under_5'] / dupe_frame['EntryCount']
|
435 |
dupe_frame['under_10%'] = dupe_frame['under_10'] / dupe_frame['EntryCount']
|
436 |
-
st.session_state['duplication_frame'] = dupe_frame[['BaseName', 'EntryCount', 'average_dupes', 'uniques', 'uniques%', 'under_5', 'under_5%', 'under_10', 'under_10%']].drop_duplicates(subset='BaseName', keep='first')
|
437 |
st.dataframe(st.session_state['duplication_frame'].style.
|
438 |
background_gradient(cmap='RdYlGn', subset=['uniques%', 'under_5%', 'under_10%'], axis=0).
|
439 |
-
background_gradient(cmap='RdYlGn_r', subset=['
|
440 |
format(precision=2), hide_index=True)
|
|
|
187 |
axis=1
|
188 |
)
|
189 |
working_df['dupes'] = working_df.groupby('sorted').transform('size')
|
190 |
+
|
191 |
+
working_df['uniques'] = working_df.groupby('BaseName').apply(
|
192 |
+
lambda x: (x['dupes'] == 1).sum()
|
193 |
+
).reindex(working_df['BaseName']).values
|
194 |
+
|
195 |
+
working_df['under_5'] = working_df.groupby('BaseName').apply(
|
196 |
+
lambda x: (x['dupes'] <= 5).sum()
|
197 |
+
).reindex(working_df['BaseName']).values
|
198 |
+
|
199 |
+
working_df['under_10'] = working_df.groupby('BaseName').apply(
|
200 |
+
lambda x: (x['dupes'] <= 10).sum()
|
201 |
+
).reindex(working_df['BaseName']).values
|
202 |
+
|
203 |
working_df = working_df.reset_index()
|
204 |
working_df['percentile_finish'] = working_df['index'].rank(pct=True)
|
205 |
working_df['finish'] = working_df['index']
|
|
|
446 |
dupe_frame['uniques%'] = dupe_frame['uniques'] / dupe_frame['EntryCount']
|
447 |
dupe_frame['under_5%'] = dupe_frame['under_5'] / dupe_frame['EntryCount']
|
448 |
dupe_frame['under_10%'] = dupe_frame['under_10'] / dupe_frame['EntryCount']
|
449 |
+
st.session_state['duplication_frame'] = dupe_frame[['BaseName', 'EntryCount', 'average_dupes', 'dupes', 'uniques', 'uniques%', 'under_5', 'under_5%', 'under_10', 'under_10%']].drop_duplicates(subset='BaseName', keep='first')
|
450 |
st.dataframe(st.session_state['duplication_frame'].style.
|
451 |
background_gradient(cmap='RdYlGn', subset=['uniques%', 'under_5%', 'under_10%'], axis=0).
|
452 |
+
background_gradient(cmap='RdYlGn_r', subset=['uniques', 'under_5', 'under_10'], axis=0).
|
453 |
format(precision=2), hide_index=True)
|