Akankshg commited on
Commit
ae5bdaf
·
verified ·
1 Parent(s): 0325edd

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +34 -34
app.py CHANGED
@@ -1019,43 +1019,43 @@ if analysis_option == 'Machine Learning':
1019
  token = os.environ["HUGGING_FACE_HUB_TOKEN"]
1020
  local_file_1 = hf_hub_download(repo_id=repo_id, filename=filename_1,repo_type="dataset", token=token)
1021
  filtered_data = pd.read_parquet(local_file_1)
1022
- if filtered_data['key_lab2'].notna().any():
1023
- column_list = ['PatientID', 'VisitID', 'GroupedICD'] + list(filtered_data['key_lab2'].iloc[0])
1024
- pivot_data = pd.pivot_table(filtered_data, values='ComponentValue', index=['PatientID', 'VisitID', 'GroupedICD'], columns='ComponentName', aggfunc=lambda x: ', '.join(map(str, x)))
1025
- pivot_data = pivot_data.reset_index(drop=False)
1026
- pivot_data = pivot_data[column_list].copy()
1027
- filtered_data = pd.merge(filtered_data, pivot_data, on=['PatientID', 'VisitID','GroupedICD'], how='left')
1028
 
1029
- filtered_data.iloc[:, -20:] = filtered_data.iloc[:, -20:].convert_dtypes()
1030
- hmm = pd.DataFrame()
1031
- # num_columns = 20
1032
- num_columns = len(list(filtered_data['key_lab2'].iloc[0]))
1033
- for i in range(1, num_columns+1):
1034
- existing_column = filtered_data.columns[-i]
1035
- new_column_name = f'{existing_column}_meanvalue'
1036
- hmm[new_column_name] = filtered_data[existing_column].apply(mean_of_values)
1037
- filtered_data = pd.concat([filtered_data, hmm], axis=1)
1038
- column_list = [
1039
- ## Necessary columns
1040
- 'PatientID', 'VisitID', 'GroupedICD',
1041
 
1042
- ## Numerical values
1043
- 'Age', 'SystolicBP',
1044
- 'DiastolicBP','Temperature',
1045
- 'Pulse', 'Weight', 'Height', 'BMI', 'Respiration',
1046
- 'SPO2', 'PHQ_9Score',
1047
- # 'PeakFlow'
1048
 
1049
- ## Categorial Values
1050
- 'LegalSex','BPLocation', 'BPPosition', 'PregnancyStatus', 'LactationStatus', 'TemperatureSource',
1051
- 'Age_Category','BP Severity','Depression Severity','weight_condition', 'Temp_condition', 'Pulse_condition',
1052
- 'Respiration_condition', 'SPO2_condition', 'PeakF_condition']
1053
- # last = list(filtered_data.columns[-20:])
1054
- last = list(hmm.columns)
1055
- required_columns = column_list + last
1056
- filtered_data = filtered_data[required_columns].copy()
1057
- filtered_data = filtered_data.drop_duplicates().reset_index(drop=True)
1058
- filtered_data = filtered_data.dropna(axis=1, how='all')
1059
  imputer(filtered_data)
1060
 
1061
 
 
1019
  token = os.environ["HUGGING_FACE_HUB_TOKEN"]
1020
  local_file_1 = hf_hub_download(repo_id=repo_id, filename=filename_1,repo_type="dataset", token=token)
1021
  filtered_data = pd.read_parquet(local_file_1)
1022
+ # if filtered_data['key_lab2'].notna().any():
1023
+ # column_list = ['PatientID', 'VisitID', 'GroupedICD'] + list(filtered_data['key_lab2'].iloc[0])
1024
+ # pivot_data = pd.pivot_table(filtered_data, values='ComponentValue', index=['PatientID', 'VisitID', 'GroupedICD'], columns='ComponentName', aggfunc=lambda x: ', '.join(map(str, x)))
1025
+ # pivot_data = pivot_data.reset_index(drop=False)
1026
+ # pivot_data = pivot_data[column_list].copy()
1027
+ # filtered_data = pd.merge(filtered_data, pivot_data, on=['PatientID', 'VisitID','GroupedICD'], how='left')
1028
 
1029
+ # filtered_data.iloc[:, -20:] = filtered_data.iloc[:, -20:].convert_dtypes()
1030
+ # hmm = pd.DataFrame()
1031
+ # # num_columns = 20
1032
+ # num_columns = len(list(filtered_data['key_lab2'].iloc[0]))
1033
+ # for i in range(1, num_columns+1):
1034
+ # existing_column = filtered_data.columns[-i]
1035
+ # new_column_name = f'{existing_column}_meanvalue'
1036
+ # hmm[new_column_name] = filtered_data[existing_column].apply(mean_of_values)
1037
+ # filtered_data = pd.concat([filtered_data, hmm], axis=1)
1038
+ # column_list = [
1039
+ # ## Necessary columns
1040
+ # 'PatientID', 'VisitID', 'GroupedICD',
1041
 
1042
+ # ## Numerical values
1043
+ # 'Age', 'SystolicBP',
1044
+ # 'DiastolicBP','Temperature',
1045
+ # 'Pulse', 'Weight', 'Height', 'BMI', 'Respiration',
1046
+ # 'SPO2', 'PHQ_9Score',
1047
+ # # 'PeakFlow'
1048
 
1049
+ # ## Categorial Values
1050
+ # 'LegalSex','BPLocation', 'BPPosition', 'PregnancyStatus', 'LactationStatus', 'TemperatureSource',
1051
+ # 'Age_Category','BP Severity','Depression Severity','weight_condition', 'Temp_condition', 'Pulse_condition',
1052
+ # 'Respiration_condition', 'SPO2_condition', 'PeakF_condition']
1053
+ # # last = list(filtered_data.columns[-20:])
1054
+ # last = list(hmm.columns)
1055
+ # required_columns = column_list + last
1056
+ # filtered_data = filtered_data[required_columns].copy()
1057
+ # filtered_data = filtered_data.drop_duplicates().reset_index(drop=True)
1058
+ # filtered_data = filtered_data.dropna(axis=1, how='all')
1059
  imputer(filtered_data)
1060
 
1061