import pandas as pd import copy import os import gradio as gr from collections import Counter import random import re from datetime import date import supabase import json ###### OG FUNCTIONS TO GENERATE SCHEDULES ###### # CONSTANTS NAME_COL = 'Juggler_Name' NUM_WORKSHOPS_COL = 'Num_Workshops' AVAIL_COL = 'Availability' DESCRIP_COL = 'Workshop_Descriptions' DELIMITER = ';' class Schedule: def __init__(self, timeslots: dict): self.num_timeslots_filled = 0 self.total_num_workshops = 0 for time,instructors in timeslots.items(): curr_len = len(instructors) if curr_len > 0: self.num_timeslots_filled += 1 self.total_num_workshops += curr_len self.timeslots = timeslots def add(self, person: str, time: str): self.total_num_workshops += 1 if len(self.timeslots[time]) == 0: self.num_timeslots_filled += 1 self.timeslots[time].append(person) def remove(self, person: str, time: str): self.total_num_workshops -= 1 if len(self.timeslots[time]) == 1: self.num_timeslots_filled -= 1 self.timeslots[time].remove(person) def print(self): print(f"# timeslots filled: {self.num_timeslots_filled}") print(f"# workshops: {self.total_num_workshops}") for time,instructors in self.timeslots.items(): print(f"{time}: {', '.join(instructors)}") # Returns True if the person can teach during the slot, and False otherwise def can_teach(person: str, slot: list, capacity: int) -> bool: if len(slot) == capacity or len(slot) > capacity: return False # No one can teach two workshops at once if person in slot: return False return True # Extracts relevant information from the df with availability and puts it into a useable format def convert_df(df, num_timeslots: int): # Key: person's name # Value: a list of their availability availability = {} # Key: person's name # Value: how many workshops they want to teach pref_dict = {} # Instructors who can teach anytime completely_available = [] for row in range(len(df)): name = df.loc[row, NAME_COL] curr_avail = df.loc[row, AVAIL_COL] curr_avail = curr_avail.split(DELIMITER) if len(curr_avail) == num_timeslots: completely_available.append(name) # should I wrap this in an "if" statement? # yes else: curr_avail = [elem.strip() for elem in curr_avail] availability[name] = curr_avail pref_dict[name] = df.loc[row, NUM_WORKSHOPS_COL] # Sorts a dictionary by length of the values such that the # key associated with the shortest value is first in the list {orders} order = sorted(availability, key=lambda k: len(availability[k])) # The idea is start with people who are the LEAST available to teach, # then put the more available instructors into the available slots new_avail_dict = {} for instructor in order: new_avail_dict[instructor] = availability[instructor] # Sorts the dict such that people who want to teach fewer workshops are first in the dict pref_dict = {k: v for k, v in sorted(pref_dict.items(), key=lambda item: item[1])} people = [] for name,number in pref_dict.items(): if number == 1: people.append(name) # Add people who are teaching multiple workshops to the list more than once else: for i in range(number): people.append(name) return {'people': people, 'availability': new_avail_dict, 'completely_available': completely_available} # Makes a dictionary where each key is a timeslot and each value is a list. # If there's no partial schedule, each list will be empty. # If there's a partial schedule, each list will include the people teaching during that slot. def initialize_timeslots(df) -> dict: all_timeslots = set() availability = df[AVAIL_COL] for elem in availability: curr_list = elem.split(DELIMITER) for inner in curr_list: all_timeslots.add(inner.strip()) to_return = {} for slot in all_timeslots: to_return[slot] = [] return to_return # Recursive function that generates all possible schedules def find_all_schedules(people: list, availability: dict, schedule_obj: Schedule, capacity: int, schedules: list, max_timeslots_list: list, max_workshops_list: list) -> None: if schedule_obj.num_timeslots_filled >= max_timeslots_list[0]: max_timeslots_list[0] = schedule_obj.num_timeslots_filled # Keep track of total number of workshops taught if schedule_obj.total_num_workshops >= max_workshops_list[0]: max_workshops_list[0] = schedule_obj.total_num_workshops schedules.append(copy.deepcopy(schedule_obj)) # Base case if len(people) == 0: return # Recursive cases person = people[0] for time in availability[person]: if can_teach(person, schedule_obj.timeslots[time], capacity): # Choose (put that person in that timeslot) schedule_obj.add(person, time) # Explore (assign everyone else to timeslots based on that decision) if len(people) == 1: find_all_schedules([], availability, schedule_obj, capacity, schedules, max_timeslots_list, max_workshops_list) else: find_all_schedules(people[1:len(people)], availability, schedule_obj, capacity, schedules, max_timeslots_list, max_workshops_list) # Unchoose (remove that person from the timeslot) schedule_obj.remove(person, time) # NOTE: this will not generate a full timeslot, but could still lead to a good schedule else: if len(people) == 1: find_all_schedules([], availability, schedule_obj, capacity, schedules, max_timeslots_list, max_workshops_list) else: find_all_schedules(people[1:len(people)], availability, schedule_obj, capacity, schedules, max_timeslots_list, max_workshops_list) return # Puts the schedule in the correct order def my_sort(curr_sched: dict, og_slots: list): # example {'4 pm': ['logan', 'andrew'], '1 pm': ['graham', 'joyce'], '3 pm': ['logan', 'dan'], '2 pm': ['graham', 'dan']} to_return = {} for elem in og_slots: if elem in curr_sched: to_return[elem] = curr_sched[elem] else: to_return[elem] = [] return to_return # Makes an organized DataFrame given a list of schedules def make_df(schedules: list, descrip_dict: dict, og_slots: list): all_times = [] all_instructors = [] count = 1 for i in range (len(schedules)): curr_sched = schedules[i] #sorted_dict = dict(sorted(curr_sched.items(), key=lambda item: item[0])) sorted_dict = my_sort(curr_sched, og_slots) curr_times = sorted_dict.keys() curr_instructors = sorted_dict.values() # Include an empty row between schedules if count != 1: all_times.append("") all_instructors.append("") if len(schedules) >= 1: all_times.append(f"Schedule #{count}") all_instructors.append("") count += 1 for slot in curr_times: all_times.append(slot) for instructors in curr_instructors: if len(descrip_dict) == 0: all_instructors.append("; ". join(instructors)) if len(descrip_dict) > 0: big_str = "" for person in instructors: if person in descrip_dict: descrip = descrip_dict[person] else: descrip = "Workshop" # {descrip} is a list bc they want to teach multiple workshops if '\n' in descrip: new_str = f"\n\n- {person}:\n{descrip}" else: new_str = f"\n\n- {person}: {descrip}" big_str += new_str all_instructors.append(big_str.strip()) if len(curr_instructors) == 0: all_instructors.append('N/A') new_df = pd.DataFrame({ "Schedule": all_times, "Instructor(s)": all_instructors }) new_df['Instructor(s)'] = new_df['Instructor(s)'].astype(str) return new_df, count - 1 # Makes a dictionary where each key is the instructor's name and # the value is the workshop(s) they're teaching def get_description_dict(df): new_dict = {} for row in range(len(df)): name = df.loc[row, NAME_COL] new_dict[name] = df.loc[row, DESCRIP_COL] return new_dict # Classifies schedules into two categories: complete and incomplete: # Complete = everyone is teaching desired number of timeslots and each timeslot has at least one workshop # NOTE: I'm using "valid" instead of "complete" as a variable name so that I don't mix it up # Incomplete = not complete def classify_schedules(people: list, schedules: list, total_timeslots: int, max_timeslots_filled: int) -> tuple: valid_schedules = [] # Key: score # Value: schedules with that score incomplete_schedules = {} # Get frequency of items in the list # Key: person # Value: number of workshops they WANT to teach pref_dict = Counter(people) all_names = pref_dict.keys() ## Evaluate each schedule ## overall_max = 0 # changes throughout the function for sched in schedules: if sched.num_timeslots_filled != max_timeslots_filled: continue # Key: person # Value: how many workshops they're ACTUALLY teaching in this schedule freq_dict = {} for name in all_names: freq_dict[name] = 0 for timeslot, instructor_list in sched.timeslots.items(): for instructor in instructor_list: if instructor in freq_dict: freq_dict[instructor] += 1 else: raise Exception("There is a serious issue!!!!") # See if everyone is teaching their desired number of workshops everyone_is_teaching = True for teacher, freq in freq_dict.items(): if freq != pref_dict[teacher]: #print(f"teacher: {teacher}. preference: {pref_dict[teacher]}. actual frequency: {freq}") everyone_is_teaching = False break filled_all_timeslots = (sched.num_timeslots_filled == total_timeslots) if everyone_is_teaching and filled_all_timeslots: valid_schedules.append(sched) else: # No need to add to incomplete_schedules if there's at least one valid schedule if len(valid_schedules) > 0: continue #print(f"teaching desired number of timeslots: {everyone_is_teaching}. At least one workshop per slot: {filled_all_timeslots}.\n{sched}\n") if sched.num_timeslots_filled >= overall_max: overall_max = sched.num_timeslots_filled if sched.num_timeslots_filled not in incomplete_schedules: incomplete_schedules[sched.num_timeslots_filled] = [] incomplete_schedules[sched.num_timeslots_filled].append(sched) if len(valid_schedules) > 0: return valid_schedules, [] else: return [], incomplete_schedules[overall_max] # Parameters: schedules that have the max number of timeslots filled # Max number of workshops taught in filled timeslots # Returns: a list of all schedules that have the max number of workshops # To make it less overwhelming, it will return {cutoff} randomly def get_best_schedules(schedules: list, cutoff: str, max_workshops: int) -> list: cutoff = int(cutoff) seen = [] best_schedules = [] for sched in schedules: if sched.total_num_workshops != max_workshops: continue if sched in seen: continue else: seen.append(sched) best_schedules.append(sched.timeslots) if cutoff == -1: return best_schedules else: if len(best_schedules) > cutoff: # Sample without replacement return random.sample(best_schedules, cutoff) else: return best_schedules # Big wrapper function that calls the other functions def main(df, capacity:int, num_results: int, og_slots: list): descrip_dict = get_description_dict(df) timeslots = initialize_timeslots(df) total_timeslots = len(timeslots) schedules = [] schedule_obj = Schedule(timeslots) # Convert the df with everyone's availability to a usable format res = convert_df(df, total_timeslots) people = res['people'] availability = res['availability'] completely_available = res['completely_available'] # Get the bare minimum of workshops that will be taught distinct_slots = set() for slots in availability.values(): for elem in slots: distinct_slots.add(elem) num_distinct_slots = len(distinct_slots) max_timeslots_list = [num_distinct_slots] max_workshops_list = [num_distinct_slots] find_all_schedules(people, availability, schedule_obj, capacity, schedules, max_timeslots_list, max_workshops_list) res = classify_schedules(people, schedules, total_timeslots, max_timeslots_list[0]) valid_schedules = res[0] decent_schedules = res[1] # Format "BTW" message if len(completely_available) == 0: btw = '' else: btw = 'BTW, ' if len(completely_available) == 1: btw += f"{completely_available[0]} can teach any time." elif len(completely_available) == 2: btw += f"{completely_available[0]} and {completely_available[1]} can teach any time." else: btw += f"{', '.join(completely_available)}can teach any time." btw += " They are NOT in any of the schedules below. Put them where you want!" # Return schedules if len(valid_schedules) > 0: best_schedules = get_best_schedules(valid_schedules, num_results, max_workshops_list[0]) res = make_df(best_schedules, descrip_dict, og_slots) new_df = res[0] count = res[1] if count == 1: results = f"Good news! I was able to make a complete schedule. {btw}" else: results = f"Good news! I was able to make multiple complete schedules. {btw}" else: best_schedules = get_best_schedules(decent_schedules, num_results, max_workshops_list[0]) res = make_df(best_schedules, descrip_dict, og_slots) new_df = res[0] count = res[1] if count == 1: results = f"Here is the best option. {btw}" else: results = f"Here are the best options. {btw}" directory = os.path.abspath(os.getcwd()) path = directory + "/schedule.csv" new_df.to_csv(path, index=False) return results, new_df, path ##### ALL THE NEW STUFF WITH SUPABASE ETC. ##### ### CONSTANTS ### NAME_COL = 'Juggler_Name' NUM_WORKSHOPS_COL = 'Num_Workshops' AVAIL_COL = 'Availability' DESCRIP_COL = 'Workshop_Descriptions' EMAIL_COL = 'Email' DELIMITER = ';' ALERT_TIME = None # leave warnings on screen indefinitely FORM_NOT_FOUND = 'Form not found' INCORRECT_PASSWORD = "The password is incorrect. Please check the password and try again. If you don't remember your password, please email jugglinggym@gmail.com." NUM_ROWS = 1 NUM_COLS_SCHEDULES = 2 NUM_COLS_ALL_RESPONSES = 4 NUM_RESULTS = 10 # randomly get {NUM_RESULTS} results theme = gr.themes.Soft( primary_hue="cyan", secondary_hue="pink", font=[gr.themes.GoogleFont('sans-serif'), 'ui-sans-serif', 'system-ui', 'Montserrat'], ) ### Connect to Supabase ### # URL = os.environ['URL'] # TODO URL = 'https://ubngctgvhjgxkvimdmri.supabase.co' #API_KEY = os.environ['API_KEY'] API_KEY = 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJzdXBhYmFzZSIsInJlZiI6InVibmdjdGd2aGpneGt2aW1kbXJpIiwicm9sZSI6ImFub24iLCJpYXQiOjE3MzQ5MjAwOTQsImV4cCI6MjA1MDQ5NjA5NH0.NtGdfP8GYNuYdPdsaLW5GjgfB0_7Q1kNBIDJtPhO8nY' client = supabase.create_client(URL, API_KEY) ### DEFINE FUNCTIONS ### ## Multi-purpose function ## ''' Returns a lowercased and stripped version of the schedule name. Returns: str ''' def standardize(schedule_name: str): return schedule_name.lower().strip() ## Functions to manage/generate schedules ## ''' Uses the name and password to get the form. Makes the buttons and other elements visible on the page. Returns: gr.Button: corresponds to find_form_btn gr.Column: corresponds to all_responses_group gr.Column: generate_schedules_explanation gr.Row: corresponds to generate_btns gr.Column: corresponds to open_close_btn_col gr.Button: corresponds to open_close_btn ''' def make_visible(schedule_name:str, password: str): skip_output = gr.Button(), gr.Column(), gr.Column(), gr.Row(), gr.Column(), gr.Button() if len(schedule_name) == 0: gr.Warning('Please enter the form name.', ALERT_TIME) return skip_output if len(password) == 0: gr.Warning('Please enter the password.', ALERT_TIME) return skip_output response = client.table('Forms').select('password', 'status').eq('form_name', standardize(schedule_name)).execute() data = response.data if len(data) > 0: my_dict = data[0] if password != my_dict['password']: gr.Warning(INCORRECT_PASSWORD, ALERT_TIME) return skip_output else: if my_dict['status'] == 'open': gr.Info('', ALERT_TIME, title='Btw, the form is currently OPEN.') return gr.Button(variant='secondary'), gr.Column(visible=True), gr.Column(visible=True), gr.Row(visible=True), gr.Column(visible=True), gr.Button("Close Form", visible=True) elif my_dict['status'] == 'closed': gr.Info('', ALERT_TIME, title='Btw, the form is currently CLOSED.') return gr.Button(variant='secondary'), gr.Column(visible=True), gr.Column(visible=True), gr.Row(visible=True),gr.Column(visible=True), gr.Button("Open Form", visible=True) else: gr.Warning(f"There is no form called \"{schedule_name}\". Please check the spelling and try again.", ALERT_TIME) return skip_output ''' Makes a blank schedule that we can return to prevent things from breaking. Returns: tuple with 3 elements: 0: str indicating that the form wasn't found 1: the DataFrame 2: the path to the DataFrame ''' def make_blank_schedule(): df = pd.DataFrame({ 'Schedule': [], 'Instructors': [] }) directory = os.path.abspath(os.getcwd()) path = directory + "/schedule.csv" df.to_csv(path, index=False) return FORM_NOT_FOUND, df, path ''' Gets a the form responses from Supabase and converts them to a DataFrame Returns: if found: a dictionary with three keys: capacity (int), df (DataFrame), and slots (list) if not found: a string indicating the form was not found ''' def get_df_from_db(schedule_name: str, password: str): response = client.table('Forms').select('password', 'capacity', 'responses', 'slots').eq('form_name', standardize(schedule_name)).execute() data = response.data if len(data) > 0: my_dict = data[0] if password != my_dict['password']: gr.Warning(INCORRECT_PASSWORD, ALERT_TIME) return FORM_NOT_FOUND # Convert to df df = pd.DataFrame(json.loads(my_dict['responses'])) return {'capacity': my_dict['capacity'], 'df': df, 'slots': my_dict['slots']} else: gr.Warning(f"There is no form called \"{schedule_name}\". Please check the spelling and try again.", ALERT_TIME) return FORM_NOT_FOUND ''' Puts all of the form responses into a DataFrame. Returns this DF along with the filepath. ''' def get_all_responses(schedule_name:str, password:str): res = get_df_from_db(schedule_name, password) if res == FORM_NOT_FOUND: df = pd.DataFrame({ NAME_COL: [], EMAIL_COL: [], NUM_WORKSHOPS_COL: [], AVAIL_COL: [], DESCRIP_COL: [] }) else: df = res['df'] df[AVAIL_COL] = [elem.replace(DELIMITER, f"{DELIMITER} ") for elem in df[AVAIL_COL].to_list()] directory = os.path.abspath(os.getcwd()) path = directory + "/all responses.csv" df.to_csv(path, index=False) if len(df) == 0: gr.Warning('', ALERT_TIME, title='No one has filled out the form yet.') return gr.DataFrame(df, visible=True), gr.File(path, visible=True) ''' Calls the algorithm to generate the best possible schedules, and returns a random subset of the results. (The same as generate_schedules_wrapper_all_results, except that this function only returns a subset of them. I had to make it into two separate functions in order to work with Gradio). Returns: DataFrame Filepath to DF (str) ''' def generate_schedules_wrapper_subset_results(schedule_name: str, password: str): res = get_df_from_db(schedule_name, password) # Return blank schedule (should be impossible to get to this condition btw) if res == FORM_NOT_FOUND: to_return = make_blank_schedule() gr.Warning(FORM_NOT_FOUND, ALERT_TIME) else: df = res['df'] if len(df) == 0: gr.Warning('', ALERT_TIME, title='No one has filled out the form yet.') to_return = make_blank_schedule() else: gr.Info('', ALERT_TIME, title='Working on generating schedules! Please DO NOT click anything on this page.') to_return = main(df, res['capacity'], NUM_RESULTS, res['slots']) gr.Info('', ALERT_TIME, title=to_return[0]) return gr.Textbox(to_return[0]), gr.DataFrame(to_return[1], visible=True), gr.File(to_return[2], visible=True) ''' Calls the algorithm to generate the best possible schedules, and returns ALL of the results. (The same as generate_schedules_wrapper_subset_results, except that this function returns all of them. I had to make it into two separate functions in order to work with Gradio). Returns: DataFrame Filepath to DF (str) ''' def generate_schedules_wrapper_all_results(schedule_name: str, password: str): res = get_df_from_db(schedule_name, password) # Return blank schedule (should be impossible to get to this condition btw) if res == FORM_NOT_FOUND: to_return = make_blank_schedule() gr.Warning(FORM_NOT_FOUND, ALERT_TIME) else: df = res['df'] if len(df) == 0: gr.Warning('', ALERT_TIME, title='No one has filled out the form yet.') to_return = make_blank_schedule() else: gr.Info('', ALERT_TIME, title='Working on generating schedules! Please DO NOT click anything on this page.') placeholder = -1 to_return = main(df, res['capacity'], placeholder, res['slots']) gr.Info('', ALERT_TIME, title=to_return[0]) return gr.Textbox(to_return[0]), gr.DataFrame(to_return[1], visible=True), gr.File(to_return[2], visible=True) ''' Opens/closes a form and changes the button after opening/closing the form. Returns: gr.Button ''' def toggle_btn(schedule_name:str, password:str): response = client.table('Forms').select('password', 'capacity', 'status').eq('form_name', standardize(schedule_name)).execute() data = response.data if len(data) > 0: my_dict = data[0] if password != my_dict['password']: gr.Warning(INCORRECT_PASSWORD, ALERT_TIME) return FORM_NOT_FOUND curr_status = my_dict['status'] if curr_status == 'open': client.table('Forms').update({'status': 'closed'}).eq('form_name', standardize(schedule_name)).execute() gr.Info('', ALERT_TIME, title="The form was closed successfully!") return gr.Button('Open Form') elif curr_status == 'closed': client.table('Forms').update({'status': 'open'}).eq('form_name', standardize(schedule_name)).execute() gr.Info('', ALERT_TIME, title="The form was opened successfully!") return gr.Button('Close Form') else: gr.Error('', ALERT_TIME, 'An unexpected error has ocurred.') return gr.Button() else: gr.Warning('', ALERT_TIME, title=f"There was no form called \"{schedule_name}\". Please check the spelling and try again.") return gr.Button() ### GRADIO ### with gr.Blocks() as demo: ### VIEW FORM RESULTS ### with gr.Tab('View Form Results'): with gr.Column() as btn_group: schedule_name = gr.Textbox(label="Form Name") password = gr.Textbox(label="Password") find_form_btn = gr.Button('Find Form', variant='primary') # 1. Get all responses with gr.Column(visible=False) as all_responses_col: gr.Markdown('# Download All Form Responses') gr.Markdown("Download everyone's responses to the form.") all_responses_btn = gr.Button('Download All Form Responses', variant='primary') with gr.Row() as all_responses_output_row: df_out = gr.DataFrame(row_count = (NUM_ROWS, "dynamic"),col_count = (NUM_COLS_ALL_RESPONSES, "dynamic"),headers=[NAME_COL, NUM_WORKSHOPS_COL, AVAIL_COL, DESCRIP_COL],wrap=True,scale=4,visible=False) file_out = gr.File(label = "Downloadable file", scale=1, visible=False) all_responses_btn.click(fn=get_all_responses, inputs=[schedule_name, password], outputs=[df_out, file_out]) # 2. Generate schedules with gr.Column(visible=False) as generate_schedules_explanation_col: gr.Markdown('# Create Schedules based on Everyone\'s Preferences.') with gr.Row(visible=False) as generate_btns_row: generate_ten_results_btn = gr.Button('Generate a Subset of Schedules', variant='primary', visible=True) generate_all_results_btn = gr.Button('Generate All Possible Schedules', visible=True) with gr.Row(visible=True) as generated_schedules_output: text_out = gr.Textbox(label='Results') generated_df_out = gr.DataFrame(row_count = (NUM_ROWS, "dynamic"),col_count = (NUM_COLS_SCHEDULES, "dynamic"),headers=["Schedule", "Instructors"],wrap=True,scale=3, visible=False) generated_file_out = gr.File(label = "Downloadable schedule file", scale=1, visible=False) generate_ten_results_btn.click(fn=generate_schedules_wrapper_subset_results, inputs=[schedule_name, password], outputs=[text_out, generated_df_out, generated_file_out], api_name='generate_random_schedules') generate_all_results_btn.click(fn=generate_schedules_wrapper_all_results, inputs=[schedule_name, password], outputs=[text_out, generated_df_out, generated_file_out], api_name='generate_all_schedules') # 3. Open/close button with gr.Column(visible=False) as open_close_btn_col: gr.Markdown('# Open or Close Form') open_close_btn = gr.Button(variant='primary') open_close_btn.click(fn=toggle_btn, inputs=[schedule_name, password], outputs=[open_close_btn]) find_form_btn.click(fn=make_visible, inputs=[schedule_name, password], outputs=[find_form_btn, all_responses_col, generate_schedules_explanation_col, generate_btns_row, open_close_btn_col, open_close_btn]) directory = os.path.abspath(os.getcwd()) allowed = directory #+ "/schedules" demo.launch(allowed_paths=[allowed], show_error=True)