File size: 2,291 Bytes
25f2580
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
import streamlit as st
import pandas as pd
import linktransformer as lt

# Function to convert DataFrame to CSV for download
def convert_df_to_csv(df):
    return df.to_csv().encode('utf-8')

st.title('DataFrame Merger using LinkTransformer')

# Function to load DataFrame
def load_dataframe(upload, path):
    if upload is not None:
        return pd.read_csv(upload)
    elif path != "":
        return pd.read_csv(path)
    else:
        return None

# Options for DataFrame 1
df1_upload = st.file_uploader("Upload DataFrame 1 (CSV)", type=['csv'], key='df1_upload')
df1_path = st.text_input("...or enter path for DataFrame 1 (CSV)", key='df1_path')

# Options for DataFrame 2
df2_upload = st.file_uploader("Upload DataFrame 2 (CSV)", type=['csv'], key='df2_upload')
df2_path = st.text_input("...or enter path for DataFrame 2 (CSV)", key='df2_path')

# Load and display the DataFrames
df1 = load_dataframe(df1_upload, df1_path)
df2 = load_dataframe(df2_upload, df2_path)

if df1 is not None:
    st.write("DataFrame 1 Preview:")
    st.dataframe(df1.head())

if df2 is not None:
    st.write("DataFrame 2 Preview:")
    st.dataframe(df2.head())


# Model selection
model_path = st.text_input("Model path (HuggingFace or local)", value="all-MiniLM-L6-v2")

# Checkbox for columns to match on
if not df1.empty and not df2.empty:
    columns_df1 = df1.columns.tolist()
    columns_df2 = df2.columns.tolist()
    
    selected_columns_df1 = st.multiselect("Select columns from DataFrame 1 to match on:", columns_df1, default=columns_df1[0])
    selected_columns_df2 = st.multiselect("Select columns from DataFrame 2 to match on:", columns_df2, default=columns_df2[0])
    
    # Perform merge
    if st.button("Merge DataFrames"):
        model=lt.LinkTransformer(model_path)
        df_lm_matched = lt.merge(df2, df1, merge_type='1:m', on=None, model=model, left_on=selected_columns_df1, right_on=selected_columns_df2)
        st.write("Merged DataFrame Preview:")
        st.dataframe(df_lm_matched.head())

        # Download button for merged DataFrame
        csv = convert_df_to_csv(df_lm_matched)
        st.download_button(
            label="Download merged DataFrame as CSV",
            data=csv,
            file_name='merged_dataframe.csv',
            mime='text/csv',
        )