suryadev1 commited on
Commit
447391b
·
1 Parent(s): c580eb1

added csv downloadable function for path analysis tool

Browse files
app.py CHANGED
@@ -11,6 +11,8 @@ import pandas as pd
11
  import plotly.graph_objects as go
12
  from sklearn.metrics import roc_auc_score
13
  from matplotlib.figure import Figure
 
 
14
  # Define the function to process the input file and model selection
15
 
16
  def process_file(model_name,inc_slider,progress=Progress(track_tqdm=True)):
@@ -85,25 +87,25 @@ def process_file(model_name,inc_slider,progress=Progress(track_tqdm=True)):
85
 
86
  selected_test_info = test_info.loc[indices]
87
 
88
- # First 20%
89
- first_20_percent_indices = selected_test_info.groupby(3).apply(
90
- lambda x: x.head(int(len(x) * 0.2))
91
- ).index.get_level_values(1).tolist()
92
 
93
- # Last 20%
94
- last_20_percent_indices = selected_test_info.groupby(3).apply(
95
- lambda x: x.tail(int(len(x) * 0.2))
96
- ).index.get_level_values(1).tolist()
97
 
98
- # Select the corresponding rows from the test file
99
- first_20_percent_rows = test.loc[first_20_percent_indices]
100
- last_20_percent_rows = test.loc[last_20_percent_indices]
101
 
102
- # Save the first 20% instances per student to a file
103
- first_20_percent_rows.to_csv('fileHandler/selected_rows_first20.txt', sep='\t', index=False, header=False, quoting=3, escapechar=' ')
104
 
105
- # Save the last 20% instances per student to a file
106
- last_20_percent_rows.to_csv('fileHandler/selected_rows_last20.txt', sep='\t', index=False, header=False, quoting=3, escapechar=' ')
107
 
108
  # select the graduation groups
109
  graduation_groups = [
@@ -125,6 +127,65 @@ def process_file(model_name,inc_slider,progress=Progress(track_tqdm=True)):
125
  "-b",str(1000)
126
  ])
127
  progress(0.5,desc="Model execution completed!! Now performing analysis on the results")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
128
  with open("fileHandler/roc_data2.pkl", 'rb') as file:
129
  data = pickle.load(file)
130
  t_label=data[0]
@@ -622,11 +683,11 @@ def process_file(model_name,inc_slider,progress=Progress(track_tqdm=True)):
622
  # "-e",str(1),
623
  # "-b",str(1000)
624
  # ])
625
- with open("fileHandler/roc_data.pkl", "rb") as f:
626
- fpr, tpr, _ = pickle.load(f)
627
- # print(fpr,tpr)
628
- roc_auc_first_k = auc(fpr, tpr)
629
- print(roc_auc_first_k)
630
 
631
 
632
  progress(0.5,desc="last '%' sampling")
@@ -641,23 +702,23 @@ def process_file(model_name,inc_slider,progress=Progress(track_tqdm=True)):
641
  # "-e",str(1),
642
  # "-b",str(1000)
643
  # ])
644
- with open("fileHandler/roc_data.pkl", "rb") as f:
645
- fpr, tpr, _ = pickle.load(f)
646
- # print(fpr,tpr)
647
- roc_auc_last_k = auc(fpr, tpr)
648
- print(roc_auc_last_k)
649
 
650
 
651
 
652
- text_output_sampled_auc = f"""
653
- ---------------------------
654
- Model: {model_name}
655
- ---------------------------\n
656
 
657
- ROC score of first 20% of problems: {roc_auc_first_k:.4f}
658
- ROC score of last 20% of problems: {roc_auc_last_k:.4f}
659
 
660
- """
661
 
662
 
663
 
@@ -1030,6 +1091,19 @@ button, select, .slider-percentage {
1030
  }
1031
 
1032
  '''
 
 
 
 
 
 
 
 
 
 
 
 
 
1033
 
1034
  with gr.Blocks(theme='gstaff/sketch', css=custom_css) as demo:
1035
 
@@ -1077,9 +1151,16 @@ with gr.Blocks(theme='gstaff/sketch', css=custom_css) as demo:
1077
  # with gr.Row():
1078
  # output_text_sampled_auc = gr.Textbox(label="")
1079
 
 
 
 
 
 
 
 
 
 
1080
 
1081
-
1082
-
1083
  btn.click(
1084
  fn=process_file,
1085
  inputs=[model_dropdown,increment_slider],
 
11
  import plotly.graph_objects as go
12
  from sklearn.metrics import roc_auc_score
13
  from matplotlib.figure import Figure
14
+ import csv
15
+ # import os
16
  # Define the function to process the input file and model selection
17
 
18
  def process_file(model_name,inc_slider,progress=Progress(track_tqdm=True)):
 
87
 
88
  selected_test_info = test_info.loc[indices]
89
 
90
+ # # First 20%
91
+ # first_20_percent_indices = selected_test_info.groupby(3).apply(
92
+ # lambda x: x.head(int(len(x) * 0.2))
93
+ # ).index.get_level_values(1).tolist()
94
 
95
+ # # Last 20%
96
+ # last_20_percent_indices = selected_test_info.groupby(3).apply(
97
+ # lambda x: x.tail(int(len(x) * 0.2))
98
+ # ).index.get_level_values(1).tolist()
99
 
100
+ # # Select the corresponding rows from the test file
101
+ # first_20_percent_rows = test.loc[first_20_percent_indices]
102
+ # last_20_percent_rows = test.loc[last_20_percent_indices]
103
 
104
+ # # Save the first 20% instances per student to a file
105
+ # first_20_percent_rows.to_csv('fileHandler/selected_rows_first20.txt', sep='\t', index=False, header=False, quoting=3, escapechar=' ')
106
 
107
+ # # Save the last 20% instances per student to a file
108
+ # last_20_percent_rows.to_csv('fileHandler/selected_rows_last20.txt', sep='\t', index=False, header=False, quoting=3, escapechar=' ')
109
 
110
  # select the graduation groups
111
  graduation_groups = [
 
127
  "-b",str(1000)
128
  ])
129
  progress(0.5,desc="Model execution completed!! Now performing analysis on the results")
130
+
131
+ # Load tlb and plb
132
+ with open("fileHandler/tlabels_plabels.pkl", "rb") as f:
133
+ tlb, plb = pickle.load(f)
134
+
135
+ # Define function to filter and write CSV
136
+ def process_and_write_csv(filtered_data, filename):
137
+ headers = [
138
+ "Row", "Sample Name", "Transaction Id", "Anon Student Id", "Session Id", "Time Zone", "Duration (sec)",
139
+ "Student Response Type", "Student Response Subtype", "Tutor Response Type", "Tutor Response Subtype",
140
+ "Level (Workspace Id)", "Problem Name", "Problem View", "Problem Start Time", "Step Name",
141
+ "Attempt At Step", "Is Last Attempt", "Outcome", "Selection", "Action", "Input", "Feedback Text",
142
+ "Feedback Classification", "Help Level", "Total Num Hints", "KC (MATHia)", "KC Category (MATHia)",
143
+ "KC (Single-KC)", "KC Category (Single-KC)", "KC (Unique-step)", "KC Category (Unique-step)",
144
+ "School", "Class", "CF (Ruleid)", "CF (Semantic Event Id)", "CF (Skill New p-Known)",
145
+ "CF (Skill Previous p-Known)", "CF (Workspace Progress Status)", "Event Type"
146
+ ]
147
+
148
+ with open("fileHandler/" + filename, 'w', newline='') as outfile:
149
+ writer = csv.writer(outfile)
150
+ writer.writerow(headers)
151
+
152
+ row_num = 1
153
+ for _, row in filtered_data.iterrows():
154
+ school, class_id, student_id, status, problem, _, time_zone, duration, attempts = row[:9]
155
+ steps_data = row[8]
156
+
157
+ for step in steps_data.split('\t'):
158
+ step_parts = step.split('-')
159
+ step_name = step_parts[0]
160
+ action = step_parts[1] if len(step_parts) > 1 else ""
161
+ attempt = step_parts[2] if len(step_parts) > 2 else ""
162
+ outcome = step_parts[-1] if len(step_parts) > 3 else ""
163
+
164
+ row_data = [
165
+ row_num, "", "", student_id, "", time_zone, duration, "", "", "", "",
166
+ problem, problem, "", "", step_name, attempt, "", outcome, "", action, "", "", "", "", "", "", "", "", "", "","",
167
+ school, class_id, "", "", "", "", "PROMOTED"
168
+ ]
169
+ writer.writerow(row_data)
170
+ row_num += 1
171
+
172
+ print(f"CSV file '{filename}' created successfully.")
173
+
174
+ # Find indices where conditions match
175
+ for task_type in [0, 1]: # test_info[6] = 1 or 2
176
+ for label in [0, 1]: # tlb = plb = 0 or 1
177
+ matching_indices = [i for i in range(len(tlb)) if tlb[i] == plb[i] == label]
178
+
179
+ # Filter the data
180
+ filtered_data = selected_test_info.iloc[matching_indices]
181
+ filtered_data = filtered_data[filtered_data[6] == task_type] # Ensure test_info[6] matches
182
+
183
+ # Define filename dynamically
184
+ filename = f"output_task{task_type}_label{label}.csv"
185
+
186
+ # Write to CSV
187
+ process_and_write_csv(filtered_data, filename)
188
+
189
  with open("fileHandler/roc_data2.pkl", 'rb') as file:
190
  data = pickle.load(file)
191
  t_label=data[0]
 
683
  # "-e",str(1),
684
  # "-b",str(1000)
685
  # ])
686
+ # with open("fileHandler/roc_data.pkl", "rb") as f:
687
+ # fpr, tpr, _ = pickle.load(f)
688
+ # # print(fpr,tpr)
689
+ # roc_auc_first_k = auc(fpr, tpr)
690
+ # print(roc_auc_first_k)
691
 
692
 
693
  progress(0.5,desc="last '%' sampling")
 
702
  # "-e",str(1),
703
  # "-b",str(1000)
704
  # ])
705
+ # with open("fileHandler/roc_data.pkl", "rb") as f:
706
+ # fpr, tpr, _ = pickle.load(f)
707
+ # # print(fpr,tpr)
708
+ # roc_auc_last_k = auc(fpr, tpr)
709
+ # print(roc_auc_last_k)
710
 
711
 
712
 
713
+ # text_output_sampled_auc = f"""
714
+ # ---------------------------
715
+ # Model: {model_name}
716
+ # ---------------------------\n
717
 
718
+ # ROC score of first 20% of problems: {roc_auc_first_k:.4f}
719
+ # ROC score of last 20% of problems: {roc_auc_last_k:.4f}
720
 
721
+ # """
722
 
723
 
724
 
 
1091
  }
1092
 
1093
  '''
1094
+ # Define the file directory
1095
+ FILE_DIR = "fileHandler"
1096
+
1097
+ # Function to get list of files
1098
+ def list_files():
1099
+ return ['output_task0_label0.csv', 'output_task0_label1.csv', 'output_task1_label0.csv', 'output_task1_label1.csv']
1100
+ # return [f for f in os.listdir(FILE_DIR) if os.path.isfile(os.path.join(FILE_DIR, f))]
1101
+
1102
+ # Function to provide the selected file path
1103
+ def provide_file_path(file_name):
1104
+ return f"{FILE_DIR}/{file_name}" if file_name else None
1105
+ # file_path = os.path.join(FILE_DIR, file_name)
1106
+ # return file_path
1107
 
1108
  with gr.Blocks(theme='gstaff/sketch', css=custom_css) as demo:
1109
 
 
1151
  # with gr.Row():
1152
  # output_text_sampled_auc = gr.Textbox(label="")
1153
 
1154
+ with gr.Row():
1155
+ file_dropdown = gr.Dropdown(choices=list_files(), label="Generate File")
1156
+ download_button = gr.Button("Generate files")
1157
+
1158
+ download_button.click(
1159
+ fn=provide_file_path,
1160
+ inputs=[file_dropdown],
1161
+ outputs=[gr.File(label="Your Download is ready, click on the right side to download")]
1162
+ )
1163
 
 
 
1164
  btn.click(
1165
  fn=process_file,
1166
  inputs=[model_dropdown,increment_slider],
fileHandler/output_task0_label0.csv ADDED
The diff for this file is too large to render. See raw diff
 
fileHandler/output_task0_label1.csv ADDED
The diff for this file is too large to render. See raw diff
 
fileHandler/output_task1_label0.csv ADDED
The diff for this file is too large to render. See raw diff
 
fileHandler/output_task1_label1.csv ADDED
The diff for this file is too large to render. See raw diff
 
fileHandler/result.txt CHANGED
@@ -3,5 +3,5 @@ total_acc: 69.00702106318957
3
  precisions: 0.7236623191454734
4
  recalls: 0.6900702106318957
5
  f1_scores: 0.6802420656474512
6
- time_taken_from_start: 20.852919101715088
7
  auc_score: 0.7457100293916334
 
3
  precisions: 0.7236623191454734
4
  recalls: 0.6900702106318957
5
  f1_scores: 0.6802420656474512
6
+ time_taken_from_start: 36.14206862449646
7
  auc_score: 0.7457100293916334
fileHandler/tlabels_plabels.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c1aabcfeb64b7645738d0507dd755822b92f2a256a2f0bdee28b2916268078eb
3
+ size 37993
new_test_saved_finetuned_model.py CHANGED
@@ -226,7 +226,9 @@ class BERTFineTuneTrainer:
226
  with open("fileHandler/roc_data.pkl", "wb") as f:
227
  pickle.dump((fpr, tpr, thresholds), f)
228
  with open("fileHandler/roc_data2.pkl", "wb") as f:
229
- pickle.dump((tlabels,positive_class_probs), f)
 
 
230
  print(final_msg)
231
  f.close()
232
  with open(self.log_folder_path+f"/log_{phase}_finetuned_info.txt", 'a') as f1:
 
226
  with open("fileHandler/roc_data.pkl", "wb") as f:
227
  pickle.dump((fpr, tpr, thresholds), f)
228
  with open("fileHandler/roc_data2.pkl", "wb") as f:
229
+ pickle.dump((tlabels,positive_class_probs), f)
230
+ with open("fileHandler/tlabels_plabels.pkl", "wb") as f:
231
+ pickle.dump((tlabels,plabels), f)
232
  print(final_msg)
233
  f.close()
234
  with open(self.log_folder_path+f"/log_{phase}_finetuned_info.txt", 'a') as f1: