File size: 3,248 Bytes
ed9dc6d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fadb089
 
 
ed9dc6d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fadb089
ed9dc6d
47660b1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1536a4a
bcdcc8b
bf85a00
 
1536a4a
ed9dc6d
 
 
bf85a00
ed9dc6d
bf85a00
ed9dc6d
bf85a00
1536a4a
 
 
 
bf85a00
1536a4a
 
bf85a00
1536a4a
 
bf85a00
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
import streamlit as st
import pandas as pd
import pickle
import requests
import base64

#===config===
st.set_page_config(
     page_title="ETDs Tagging",
     page_icon="",
     layout="wide"
)
st.header("Tagging Categories")
st.subheader('Put your file here...')

#========unique id========
@st.cache_resource(ttl=3600)
def create_list():
    l = [1, 2, 3]
    return l

l = create_list()
first_list_value = l[0]
l[0] = first_list_value + 1
uID = str(l[0])

@st.cache_data(ttl=3600)
def get_ext(uploaded_file):
    extype = uID+uploaded_file.name
    return extype

def read_tf(url):
    response = requests.get(url)
    open("temp.pkl", "wb").write(response.content)
    with open("temp.pkl", "rb") as f:
        preprocessing = pickle.load(f)
    return preprocessing
    
svm_classifier = read_model("model2.pkl")
preprocessing = read_tf("preprocessing.pkl")

# Function to predict the category for a given abstract
def predict_category(abstract):
    # Preprocess the abstract
    abstract_preprocessed = preprocessing.transform([abstract])
    # Make prediction
    prediction = svm_classifier.predict(abstract_preprocessed)
    return prediction

# Create sidebar
#===upload file===
@st.cache_data(ttl=3600)
def upload(file):
    papers = pd.read_csv(uploaded_file)
    return papers

@st.cache_data(ttl=3600)
def conv_txt(extype):
    papers = pd.read_csv(uploaded_file, sep='\t', lineterminator='\r')
    papers.rename(columns=col_dict, inplace=True)
    return papers


#===Read data===

# uploaded_file = st.file_uploader("Choose a file", type=['csv'])

# df = None  # Initialize df as None outside the if block

# if uploaded_file is not None:
#     df = pd.read_csv(uploaded_file, encoding='latin-1')
#     st.dataframe(df)
    
#     # Tag the "Abstract" column with the corresponding categories
#     df['category'] = df['Abstract'].apply(predict_category)
#     # df['category'] = df['category'].str[2:-2]
    
#     st.dataframe(df)

# st.sidebar.header("Download Results")
# st.sidebar.text("Download the tagged results as a CSV file.")

# if df is not None:
#     csv = df.to_csv(index=False)
#     b64 = base64.b64encode(csv.encode()).decode()
#     st.sidebar.download_button(
#         label="Download", 
#         data=b64,
#         file_name="results.csv",
#         mime="text/csv"
#     )
# else:
#     st.sidebar.text("Please upload a file first to enable the download button.")


uploaded_file = st.file_uploader("Choose a file", type=['csv'])
st.sidebar.header("Download Results")
st.sidebar.text("Download the tagged results as a CSV file.")

if uploaded_file is not None:
    df = pd.read_csv(uploaded_file, encoding='latin-1')
    st.dataframe(df)
    
    # Tag the "Abstract" column with the corresponding categories
    df['category'] = df['Abstract'].apply(lambda x: predict_category(x)[0])
    st.dataframe(df)
    
    # Convert DataFrame to CSV and encode it properly
    csv = df.to_csv(index=False).encode('utf-8')
    b64 = base64.b64encode(csv).decode()  # Decode the Base64 encoded bytes

    st.sidebar.download_button(
        label="Download CSV",
        data=base64.b64decode(b64),  # Decode Base64 to bytes
        file_name="results.csv",
        mime="text/csv",
        key='download-csv'
    )