Update data_processor.py
Browse files- data_processor.py +4 -19
data_processor.py
CHANGED
@@ -8,6 +8,7 @@ class DataProcessor:
|
|
8 |
'Did the intervention happen today?',
|
9 |
'Did the intervention take place today?'
|
10 |
]
|
|
|
11 |
ENGAGED_STR = 'Engaged'
|
12 |
PARTIALLY_ENGAGED_STR = 'Partially Engaged'
|
13 |
NOT_ENGAGED_STR = 'Not Engaged'
|
@@ -55,22 +56,6 @@ class DataProcessor:
|
|
55 |
print(f"Error converting series to datetime: {e}")
|
56 |
return series
|
57 |
|
58 |
-
# def replace_student_names_with_initials(self, df):
|
59 |
-
# updated_columns = []
|
60 |
-
# for col in df.columns:
|
61 |
-
# if col.startswith('Student Attendance'):
|
62 |
-
# match = re.match(r'Student Attendance \[(.+?)\]', col)
|
63 |
-
# if match:
|
64 |
-
# name = match.group(1)
|
65 |
-
# initials = ''.join([part[0] for part in name.split()])
|
66 |
-
# updated_columns.append(f'Student Attendance [{initials}]')
|
67 |
-
# else:
|
68 |
-
# updated_columns.append(col)
|
69 |
-
# else:
|
70 |
-
# updated_columns.append(col)
|
71 |
-
# df.columns = updated_columns
|
72 |
-
# return df
|
73 |
-
|
74 |
def replace_student_names_with_initials(self, df):
|
75 |
updated_columns = []
|
76 |
for col in df.columns:
|
@@ -112,7 +97,7 @@ class DataProcessor:
|
|
112 |
def compute_intervention_statistics(self, df):
|
113 |
intervention_column = self.get_intervention_column(df)
|
114 |
total_days = len(df)
|
115 |
-
sessions_held = df[intervention_column].str.strip().str.lower().
|
116 |
intervention_frequency = (sessions_held / total_days) * 100 if total_days > 0 else 0
|
117 |
return pd.DataFrame({
|
118 |
'Intervention Dosage (%)': [round(intervention_frequency, 0)],
|
@@ -134,7 +119,7 @@ class DataProcessor:
|
|
134 |
|
135 |
def compute_student_metrics(self, df):
|
136 |
intervention_column = self.get_intervention_column(df)
|
137 |
-
intervention_df = df[df[intervention_column].str.strip().str.lower()
|
138 |
intervention_sessions_held = len(intervention_df)
|
139 |
student_columns = [col for col in df.columns if col.startswith('Student Attendance')]
|
140 |
|
@@ -151,7 +136,7 @@ class DataProcessor:
|
|
151 |
] else 0)
|
152 |
|
153 |
sessions_attended = attendance_values.sum()
|
154 |
-
attendance_pct = (sessions_attended / intervention_sessions_held
|
155 |
attendance_pct = round(attendance_pct)
|
156 |
|
157 |
engagement_counts = {
|
|
|
8 |
'Did the intervention happen today?',
|
9 |
'Did the intervention take place today?'
|
10 |
]
|
11 |
+
YES_RESPONSES = ['yes', 'assessment day'] # Added this line
|
12 |
ENGAGED_STR = 'Engaged'
|
13 |
PARTIALLY_ENGAGED_STR = 'Partially Engaged'
|
14 |
NOT_ENGAGED_STR = 'Not Engaged'
|
|
|
56 |
print(f"Error converting series to datetime: {e}")
|
57 |
return series
|
58 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
59 |
def replace_student_names_with_initials(self, df):
|
60 |
updated_columns = []
|
61 |
for col in df.columns:
|
|
|
97 |
def compute_intervention_statistics(self, df):
|
98 |
intervention_column = self.get_intervention_column(df)
|
99 |
total_days = len(df)
|
100 |
+
sessions_held = df[intervention_column].str.strip().str.lower().isin(self.YES_RESPONSES).sum() # Modified line
|
101 |
intervention_frequency = (sessions_held / total_days) * 100 if total_days > 0 else 0
|
102 |
return pd.DataFrame({
|
103 |
'Intervention Dosage (%)': [round(intervention_frequency, 0)],
|
|
|
119 |
|
120 |
def compute_student_metrics(self, df):
|
121 |
intervention_column = self.get_intervention_column(df)
|
122 |
+
intervention_df = df[df[intervention_column].str.strip().str.lower().isin(self.YES_RESPONSES)] # Modified line
|
123 |
intervention_sessions_held = len(intervention_df)
|
124 |
student_columns = [col for col in df.columns if col.startswith('Student Attendance')]
|
125 |
|
|
|
136 |
] else 0)
|
137 |
|
138 |
sessions_attended = attendance_values.sum()
|
139 |
+
attendance_pct = (sessions_attended / intervention_sessions_held * 100) if intervention_sessions_held > 0 else 0
|
140 |
attendance_pct = round(attendance_pct)
|
141 |
|
142 |
engagement_counts = {
|