import streamlit as st import pandas as pd import linktransformer as lt # Function to convert DataFrame to CSV for download def convert_df_to_csv(df): return df.to_csv().encode('utf-8') st.title('DataFrame Merger using LinkTransformer') # Function to load DataFrame def load_dataframe(upload, path): if upload is not None: return pd.read_csv(upload) elif path != "": return pd.read_csv(path) else: return None # Options for DataFrame 1 df1_upload = st.file_uploader("Upload DataFrame 1 (CSV)", type=['csv'], key='df1_upload') df1_path = st.text_input("...or enter path for DataFrame 1 (CSV)", key='df1_path') # Options for DataFrame 2 df2_upload = st.file_uploader("Upload DataFrame 2 (CSV)", type=['csv'], key='df2_upload') df2_path = st.text_input("...or enter path for DataFrame 2 (CSV)", key='df2_path') # Load and display the DataFrames df1 = load_dataframe(df1_upload, df1_path) df2 = load_dataframe(df2_upload, df2_path) if df1 is not None: st.write("DataFrame 1 Preview:") st.dataframe(df1.head()) if df2 is not None: st.write("DataFrame 2 Preview:") st.dataframe(df2.head()) # Model selection model_path = st.text_input("Model path (HuggingFace or local)", value="all-MiniLM-L6-v2") # Checkbox for columns to match on if not df1.empty and not df2.empty: columns_df1 = df1.columns.tolist() columns_df2 = df2.columns.tolist() selected_columns_df1 = st.multiselect("Select columns from DataFrame 1 to match on:", columns_df1, default=columns_df1[0]) selected_columns_df2 = st.multiselect("Select columns from DataFrame 2 to match on:", columns_df2, default=columns_df2[0]) # Perform merge if st.button("Merge DataFrames"): model=lt.LinkTransformer(model_path) df_lm_matched = lt.merge(df2, df1, merge_type='1:m', on=None, model=model, left_on=selected_columns_df1, right_on=selected_columns_df2) st.write("Merged DataFrame Preview:") st.dataframe(df_lm_matched.head()) # Download button for merged DataFrame csv = convert_df_to_csv(df_lm_matched) st.download_button( label="Download merged DataFrame as CSV", data=csv, file_name='merged_dataframe.csv', mime='text/csv', )