Spaces:

Multichem-PD
/

DFS_Contest_Analyzer

Running

DFS_Contest_Analyzer / global_func /find_csv_mismatches.py

James McCool

Add functionality for player name cleaning and CSV mismatch detection

d04558f 3 months ago

4.28 kB

	import streamlit as st
	import numpy as np
	import pandas as pd
	from fuzzywuzzy import process

	def find_csv_mismatches(csv_df, projections_df):
	# Create copies of the dataframes to avoid modifying the originals
	csv_df = csv_df.copy()
	projections_df = projections_df.copy()

	if 'Name' not in csv_df.columns:
	st.error("No 'Name' column found in CSV file")
	return csv_df

	if 'player_names' not in projections_df.columns:
	st.error("No 'player_names' column found in projections file")
	return csv_df

	# Get unique player names from CSV and projections
	csv_players = set(csv_df['Name'].dropna().unique())
	projection_players = set(projections_df['player_names'].unique())
	projection_players_list = list(csv_players)

	# Find players in CSV that are missing from projections
	players_missing_from_projections = list(projection_players - csv_players)

	# Automatically handle 100% matches before starting interactive process
	players_to_process = []
	for player in players_missing_from_projections:
	if not isinstance(player, str):
	st.warning(f"Skipping non-string value: {player}")
	continue
	closest_matches = process.extract(player, projection_players_list, limit=1)
	if closest_matches[0][1] == 100: # If perfect match found
	match_name = closest_matches[0][0]
	# Update CSV DataFrame to use the projection name
	csv_df.loc[csv_df['Name'] == player, 'Name'] = match_name
	st.success(f"Automatically matched '{player}' with '{match_name}' (100% match)")
	else:
	players_to_process.append(player)

	# Initialize session state for tracking current player if not exists
	if 'csv_current_player_index' not in st.session_state:
	st.session_state.csv_current_player_index = 0
	st.session_state.csv_players_to_process = players_to_process

	# Display results
	if players_missing_from_projections:
	st.warning("Players in CSV but missing from projections")

	# Display remaining players
	remaining_players = st.session_state.csv_players_to_process[st.session_state.csv_current_player_index:]
	st.info(f"Remaining players to process ({len(remaining_players)}):\n" +
	"\n".join(f"- {player}" for player in remaining_players))

	if st.session_state.csv_current_player_index < len(st.session_state.csv_players_to_process):
	current_player = st.session_state.csv_players_to_process[st.session_state.csv_current_player_index]

	# Find the top 3 closest matches
	closest_matches = process.extract(current_player, projection_players_list, limit=3)

	st.write(f"Missing Player {st.session_state.csv_current_player_index + 1} of {len(st.session_state.csv_players_to_process)}: {current_player}")

	# Create radio buttons for selection
	options = [f"{match[0]} ({match[1]}%)" for match in closest_matches]
	options.append("None of these")

	selected_option = st.radio(
	f"Select correct match:",
	options,
	key=f"csv_radio_{current_player}"
	)

	if st.button("Confirm Selection", key="csv_confirm"):
	if selected_option != "None of these":
	selected_name = selected_option.split(" (")[0]
	# Update CSV DataFrame
	csv_df.loc[csv_df['Name'] == current_player, 'Name'] = selected_name
	st.success(f"Replaced '{current_player}' with '{selected_name}'")
	st.session_state['csv_file'] = csv_df

	# Move to next player
	st.session_state.csv_current_player_index += 1
	st.rerun()
	else:
	st.success("All players have been processed!")
	# Reset the index for future runs
	st.session_state.csv_current_player_index = 0
	st.session_state.csv_players_to_process = []
	else:
	st.success("All CSV players found in projections!")

	return csv_df