Spaces:
Sleeping
Sleeping
# !pip install streamlit | |
# !pip install pandas | |
import pandas as pd | |
import streamlit as st | |
import base64 | |
import io | |
import base64 | |
# Functions | |
def map_data_to_template(mapping_df, template_df, data_df): | |
# Initialize the final output dataframe with the template columns, filled with NaN | |
final_output_df = pd.DataFrame(columns=template_df.columns) | |
# Prepare a dictionary to hold the mapping from MEDLab to NDA variables | |
variable_mapping = mapping_df.set_index('MEDLab Variable')['NDA Variable'].to_dict() | |
# Iterate over each NDA variable to map the data | |
for nda_var in final_output_df.columns: | |
medlab_vars = [medlab_var for medlab_var, nda_mapped_var in variable_mapping.items() if nda_mapped_var == nda_var] | |
# Initialize the column with None | |
final_output_df[nda_var] = [None] * len(data_df) | |
# Go through each potential MEDLab variable until we find one that's present and has data | |
for medlab_var in medlab_vars: | |
if medlab_var in data_df.columns and not data_df[medlab_var].isnull().all(): | |
# If a date column, convert to the specified format | |
if 'date' in medlab_var: | |
final_output_df[nda_var] = pd.to_datetime(data_df[medlab_var], errors='coerce').dt.strftime('%m/%d/%Y') | |
else: | |
final_output_df[nda_var] = data_df[medlab_var] | |
break # Stop checking once we've mapped one | |
return final_output_df | |
# Streamlit app | |
def main(): | |
st.markdown("<h1 style='text-align: center; color: #E694FF;'>Data Transformer</h1>", unsafe_allow_html=True) | |
# File Uploader for each CSV | |
st.subheader("Upload Files") | |
nimh_template_file = st.file_uploader("Choose NIMH Template CSV", type=['csv']) | |
redcap_data_file = st.file_uploader("Choose REDCap Data CSV", type=['csv']) | |
conversion_key_file = st.file_uploader("Choose Conversion Key CSV", type=['csv']) | |
if nimh_template_file and redcap_data_file and conversion_key_file: | |
# Convert the file objects to DataFrames | |
nimh_template_df = pd.read_csv(io.StringIO(nimh_template_file.getvalue().decode('utf-8')), skiprows=1) | |
redcap_data_df = pd.read_csv(io.StringIO(redcap_data_file.getvalue().decode('utf-8'))) | |
conversion_key_df = pd.read_csv(io.StringIO(conversion_key_file.getvalue().decode('utf-8'))) | |
transformed_data_df = map_data_to_template( | |
conversion_key_df, | |
nimh_template_df, | |
redcap_data_df | |
) | |
# Display transformed data | |
st.subheader("Transformed Data") | |
st.write(transformed_data_df) | |
# Download button for transformed data | |
st.subheader("Download Transformed Data") | |
csv = transformed_data_df.to_csv(index=False) | |
b64 = base64.b64encode(csv.encode()).decode() # some strings <-> bytes conversions necessary here | |
href = f'<a href="data:file/csv;base64,{b64}" download="transformed_data.csv">Download CSV File</a>' | |
st.markdown(href, unsafe_allow_html=True) | |
if __name__ == '__main__': | |
main() | |