File size: 2,990 Bytes
25f2580
 
 
 
 
 
 
 
3ad4fe0
25f2580
 
d1c30b8
 
25f2580
d1c30b8
 
 
 
25f2580
d1c30b8
25f2580
 
 
 
 
 
 
d1c30b8
 
25f2580
 
 
 
 
 
 
 
 
 
 
 
3ad4fe0
25f2580
a290a0d
3ad4fe0
 
 
 
 
 
 
 
 
 
 
 
25f2580
3ad4fe0
 
 
0812987
3ad4fe0
 
 
 
 
 
 
 
d1c30b8
 
3ad4fe0
 
0812987
3ad4fe0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
import streamlit as st
import pandas as pd
import linktransformer as lt

# Function to convert DataFrame to CSV for download
def convert_df_to_csv(df):
    return df.to_csv().encode('utf-8')

st.title('Merge Dataframes using LinkTransformer')

# Function to load DataFrame
def load_dataframe(upload):
    ##if csv is uploaded use read_csv to load the data , otherwise use read_excel
    if upload is not None:
        if upload.name.endswith('csv'):
            return pd.read_csv(upload)
        else:
            return pd.read_excel(upload)
    else:
        return pd.DataFrame()
# Options for DataFrame 1
df1_upload = st.file_uploader("Upload DataFrame 1 (CSV)", type=['csv'], key='df1_upload')

# Options for DataFrame 2
df2_upload = st.file_uploader("Upload DataFrame 2 (CSV)", type=['csv'], key='df2_upload')

# Load and display the DataFrames
df1 = load_dataframe(df1_upload)
df2 = load_dataframe(df2_upload)

if df1 is not None:
    st.write("DataFrame 1 Preview:")
    st.dataframe(df1.head())

if df2 is not None:
    st.write("DataFrame 2 Preview:")
    st.dataframe(df2.head())


# Model selection
model_path = st.text_input("Model path (HuggingFace or local)", value="all-MiniLM-L6-v2")
##More on model selection available on https://linktransformer.github.io/

if df1_upload is not None and df2_upload is not None:
    # Checkbox for columns to match on
    if not df1.empty and not df2.empty:
        columns_df1 = df1.columns.tolist()
        columns_df2 = df2.columns.tolist()
        selected_columns_df1 = st.multiselect("Select columns from DataFrame 1 to match on:", columns_df1, default=columns_df1[0])
        selected_columns_df2 = st.multiselect("Select columns from DataFrame 2 to match on:", columns_df2, default=columns_df2[0])
         # Perform merge
        if st.button("Merge DataFrames"):
            model=lt.LinkTransformer(model_path)
            df_lm_matched = lt.merge(df2, df1, merge_type='1:m', on=None, model=model, left_on=selected_columns_df1, right_on=selected_columns_df2)
            st.write("Merged DataFrame Preview:")
            st.dataframe(df_lm_matched.head())

            # Download button for merged DataFrame
            csv = convert_df_to_csv(df_lm_matched)
            st.download_button(
                label="Download as CSV",
                data=csv,
                file_name='merged_dataframe.csv',
                mime='text/csv',
            )
    else:
        st.write("It appears that your dataframes are empty. Please upload valid dataframes.")

       
else:
    st.write("Please upload or enter paths for both DataFrames.")
            
##Add website and citation
st.write("For more information and advanced usage, please visit the [LinkTransformer website](https://linktransformer.github.io/).")
st.write("If you use LinkTransformer in your research, please cite the following paper: [LinkTransformer: A Unified Package for Record Linkage with Transformer Language Models](https://arxiv.org/abs/2309.00789)")