Spaces:

suryadev1
/

astra

Sleeping

App Files Files Community

suryadev1 commited on Mar 31

Commit

447391b

1 Parent(s): c580eb1

added csv downloadable function for path analysis tool

Browse files

Files changed (8) hide show

app.py +115 -34
fileHandler/output_task0_label0.csv +0 -0
fileHandler/output_task0_label1.csv +0 -0
fileHandler/output_task1_label0.csv +0 -0
fileHandler/output_task1_label1.csv +0 -0
fileHandler/result.txt +1 -1
fileHandler/tlabels_plabels.pkl +3 -0
new_test_saved_finetuned_model.py +3 -1

app.py CHANGED Viewed

@@ -11,6 +11,8 @@ import pandas as pd
 import plotly.graph_objects as go
 from sklearn.metrics import roc_auc_score
 from matplotlib.figure import Figure
 # Define the function to process the input file and model selection
 def process_file(model_name,inc_slider,progress=Progress(track_tqdm=True)):
@@ -85,25 +87,25 @@ def process_file(model_name,inc_slider,progress=Progress(track_tqdm=True)):
     selected_test_info = test_info.loc[indices]
-    # First 20%
-    first_20_percent_indices = selected_test_info.groupby(3).apply(
-        lambda x: x.head(int(len(x) * 0.2))
-    ).index.get_level_values(1).tolist()
-    # Last 20%
-    last_20_percent_indices = selected_test_info.groupby(3).apply(
-        lambda x: x.tail(int(len(x) * 0.2))
-    ).index.get_level_values(1).tolist()
-    # Select the corresponding rows from the test file
-    first_20_percent_rows = test.loc[first_20_percent_indices]
-    last_20_percent_rows = test.loc[last_20_percent_indices]
-    # Save the first 20% instances per student to a file
-    first_20_percent_rows.to_csv('fileHandler/selected_rows_first20.txt', sep='\t', index=False, header=False, quoting=3, escapechar=' ')
-    # Save the last 20% instances per student to a file
-    last_20_percent_rows.to_csv('fileHandler/selected_rows_last20.txt', sep='\t', index=False, header=False, quoting=3, escapechar=' ')
     # select the graduation groups
     graduation_groups = [
@@ -125,6 +127,65 @@ def process_file(model_name,inc_slider,progress=Progress(track_tqdm=True)):
         "-b",str(1000)
     ])
     progress(0.5,desc="Model execution completed!! Now performing analysis on the results")
     with open("fileHandler/roc_data2.pkl", 'rb') as file:
         data = pickle.load(file)
     t_label=data[0]
@@ -622,11 +683,11 @@ def process_file(model_name,inc_slider,progress=Progress(track_tqdm=True)):
 #     "-e",str(1),
 #     "-b",str(1000)
 # ])
-    with open("fileHandler/roc_data.pkl", "rb") as f:
-        fpr, tpr, _ = pickle.load(f)
-    # print(fpr,tpr)
-    roc_auc_first_k = auc(fpr, tpr)
-    print(roc_auc_first_k)
     progress(0.5,desc="last '%' sampling")
@@ -641,23 +702,23 @@ def process_file(model_name,inc_slider,progress=Progress(track_tqdm=True)):
 #     "-e",str(1),
 #     "-b",str(1000)
 # ])
-    with open("fileHandler/roc_data.pkl", "rb") as f:
-        fpr, tpr, _ = pickle.load(f)
-    # print(fpr,tpr)
-    roc_auc_last_k = auc(fpr, tpr)
-    print(roc_auc_last_k)
-    text_output_sampled_auc = f"""
-        ---------------------------
-        Model: {model_name}
-        ---------------------------\n
-        ROC score of first 20% of problems: {roc_auc_first_k:.4f}
-        ROC score of last 20% of problems: {roc_auc_last_k:.4f}
-    """
@@ -1030,6 +1091,19 @@ button, select, .slider-percentage {
 }
 '''
 with gr.Blocks(theme='gstaff/sketch', css=custom_css) as demo:
@@ -1077,9 +1151,16 @@ with gr.Blocks(theme='gstaff/sketch', css=custom_css) as demo:
     # with gr.Row():
     #     output_text_sampled_auc = gr.Textbox(label="")
     btn.click(
         fn=process_file,
         inputs=[model_dropdown,increment_slider],

 import plotly.graph_objects as go
 from sklearn.metrics import roc_auc_score
 from matplotlib.figure import Figure
+import csv
+# import os
 # Define the function to process the input file and model selection
 def process_file(model_name,inc_slider,progress=Progress(track_tqdm=True)):
     selected_test_info = test_info.loc[indices]
+    # # First 20%
+    # first_20_percent_indices = selected_test_info.groupby(3).apply(
+    #     lambda x: x.head(int(len(x) * 0.2))
+    # ).index.get_level_values(1).tolist()
+    # # Last 20%
+    # last_20_percent_indices = selected_test_info.groupby(3).apply(
+    #     lambda x: x.tail(int(len(x) * 0.2))
+    # ).index.get_level_values(1).tolist()
+    # # Select the corresponding rows from the test file
+    # first_20_percent_rows = test.loc[first_20_percent_indices]
+    # last_20_percent_rows = test.loc[last_20_percent_indices]
+    # # Save the first 20% instances per student to a file
+    # first_20_percent_rows.to_csv('fileHandler/selected_rows_first20.txt', sep='\t', index=False, header=False, quoting=3, escapechar=' ')
+    # # Save the last 20% instances per student to a file
+    # last_20_percent_rows.to_csv('fileHandler/selected_rows_last20.txt', sep='\t', index=False, header=False, quoting=3, escapechar=' ')
     # select the graduation groups
     graduation_groups = [
         "-b",str(1000)
     ])
     progress(0.5,desc="Model execution completed!! Now performing analysis on the results")
+    # Load tlb and plb
+    with open("fileHandler/tlabels_plabels.pkl", "rb") as f:
+        tlb, plb = pickle.load(f)
+    # Define function to filter and write CSV
+    def process_and_write_csv(filtered_data, filename):
+        headers = [
+            "Row", "Sample Name", "Transaction Id", "Anon Student Id", "Session Id", "Time Zone", "Duration (sec)",
+            "Student Response Type", "Student Response Subtype", "Tutor Response Type", "Tutor Response Subtype",
+            "Level (Workspace Id)", "Problem Name", "Problem View", "Problem Start Time", "Step Name",
+            "Attempt At Step", "Is Last Attempt", "Outcome", "Selection", "Action", "Input", "Feedback Text",
+            "Feedback Classification", "Help Level", "Total Num Hints", "KC (MATHia)", "KC Category (MATHia)",
+            "KC (Single-KC)", "KC Category (Single-KC)", "KC (Unique-step)", "KC Category (Unique-step)",
+            "School", "Class", "CF (Ruleid)", "CF (Semantic Event Id)", "CF (Skill New p-Known)",
+            "CF (Skill Previous p-Known)", "CF (Workspace Progress Status)", "Event Type"
+        ]
+        with open("fileHandler/" + filename, 'w', newline='') as outfile:
+            writer = csv.writer(outfile)
+            writer.writerow(headers)
+            row_num = 1
+            for _, row in filtered_data.iterrows():
+                school, class_id, student_id, status, problem, _, time_zone, duration, attempts = row[:9]
+                steps_data = row[8]
+                for step in steps_data.split('\t'):
+                    step_parts = step.split('-')
+                    step_name = step_parts[0]
+                    action = step_parts[1] if len(step_parts) > 1 else ""
+                    attempt = step_parts[2] if len(step_parts) > 2 else ""
+                    outcome = step_parts[-1] if len(step_parts) > 3 else ""
+                    row_data = [
+                        row_num, "", "", student_id, "", time_zone, duration, "", "", "", "",
+                        problem, problem, "", "", step_name, attempt, "", outcome, "", action, "", "", "", "", "", "", "", "", "", "","",
+                        school, class_id, "", "", "", "", "PROMOTED"
+                    ]
+                    writer.writerow(row_data)
+                    row_num += 1
+        print(f"CSV file '{filename}' created successfully.")
+    # Find indices where conditions match
+    for task_type in [0, 1]:  # test_info[6] = 1 or 2
+        for label in [0, 1]:  # tlb = plb = 0 or 1
+            matching_indices = [i for i in range(len(tlb)) if tlb[i] == plb[i] == label]
+            # Filter the data
+            filtered_data = selected_test_info.iloc[matching_indices]
+            filtered_data = filtered_data[filtered_data[6] == task_type]  # Ensure test_info[6] matches
+            # Define filename dynamically
+            filename = f"output_task{task_type}_label{label}.csv"
+            # Write to CSV
+            process_and_write_csv(filtered_data, filename)
     with open("fileHandler/roc_data2.pkl", 'rb') as file:
         data = pickle.load(file)
     t_label=data[0]
 #     "-e",str(1),
 #     "-b",str(1000)
 # ])
+    # with open("fileHandler/roc_data.pkl", "rb") as f:
+    #     fpr, tpr, _ = pickle.load(f)
+    # # print(fpr,tpr)
+    # roc_auc_first_k = auc(fpr, tpr)
+    # print(roc_auc_first_k)
     progress(0.5,desc="last '%' sampling")
 #     "-e",str(1),
 #     "-b",str(1000)
 # ])
+    # with open("fileHandler/roc_data.pkl", "rb") as f:
+    #     fpr, tpr, _ = pickle.load(f)
+    # # print(fpr,tpr)
+    # roc_auc_last_k = auc(fpr, tpr)
+    # print(roc_auc_last_k)
+    # text_output_sampled_auc = f"""
+    #     ---------------------------
+    #     Model: {model_name}
+    #     ---------------------------\n
+    #     ROC score of first 20% of problems: {roc_auc_first_k:.4f}
+    #     ROC score of last 20% of problems: {roc_auc_last_k:.4f}
+    # """
 }
 '''
+# Define the file directory
+FILE_DIR = "fileHandler"
+# Function to get list of files
+def list_files():
+    return ['output_task0_label0.csv', 'output_task0_label1.csv', 'output_task1_label0.csv', 'output_task1_label1.csv']
+    # return [f for f in os.listdir(FILE_DIR) if os.path.isfile(os.path.join(FILE_DIR, f))]
+# Function to provide the selected file path
+def provide_file_path(file_name):
+    return f"{FILE_DIR}/{file_name}" if file_name else None
+    # file_path = os.path.join(FILE_DIR, file_name)
+    # return file_path
 with gr.Blocks(theme='gstaff/sketch', css=custom_css) as demo:
     # with gr.Row():
     #     output_text_sampled_auc = gr.Textbox(label="")
+    with gr.Row():
+        file_dropdown = gr.Dropdown(choices=list_files(), label="Generate File")
+        download_button = gr.Button("Generate files")
+    download_button.click(
+        fn=provide_file_path,
+        inputs=[file_dropdown],
+        outputs=[gr.File(label="Your Download is ready, click on the right side to download")]
+    )
     btn.click(
         fn=process_file,
         inputs=[model_dropdown,increment_slider],

fileHandler/output_task0_label0.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

fileHandler/output_task0_label1.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

fileHandler/output_task1_label0.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

fileHandler/output_task1_label1.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

fileHandler/result.txt CHANGED Viewed

@@ -3,5 +3,5 @@ total_acc: 69.00702106318957
 precisions: 0.7236623191454734
 recalls: 0.6900702106318957
 f1_scores: 0.6802420656474512
-time_taken_from_start: 20.852919101715088
 auc_score: 0.7457100293916334

 precisions: 0.7236623191454734
 recalls: 0.6900702106318957
 f1_scores: 0.6802420656474512
+time_taken_from_start: 36.14206862449646
 auc_score: 0.7457100293916334

fileHandler/tlabels_plabels.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c1aabcfeb64b7645738d0507dd755822b92f2a256a2f0bdee28b2916268078eb
+size 37993

new_test_saved_finetuned_model.py CHANGED Viewed

@@ -226,7 +226,9 @@ class BERTFineTuneTrainer:
             with open("fileHandler/roc_data.pkl", "wb") as f:
                 pickle.dump((fpr, tpr, thresholds), f)
             with open("fileHandler/roc_data2.pkl", "wb") as f:
-                pickle.dump((tlabels,positive_class_probs), f)
             print(final_msg)
             f.close()
             with open(self.log_folder_path+f"/log_{phase}_finetuned_info.txt", 'a') as f1:

             with open("fileHandler/roc_data.pkl", "wb") as f:
                 pickle.dump((fpr, tpr, thresholds), f)
             with open("fileHandler/roc_data2.pkl", "wb") as f:
+                pickle.dump((tlabels,positive_class_probs), f)
+            with open("fileHandler/tlabels_plabels.pkl", "wb") as f:
+                pickle.dump((tlabels,plabels), f)
             print(final_msg)
             f.close()
             with open(self.log_folder_path+f"/log_{phase}_finetuned_info.txt", 'a') as f1: