File size: 2,957 Bytes
ed9dc6d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cb480a0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ed9dc6d
 
 
 
 
 
eda6550
 
 
ed9dc6d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
eda6550
 
bcdcc8b
bf85a00
 
1536a4a
ed9dc6d
 
 
bf85a00
ed9dc6d
bf85a00
ed9dc6d
bf85a00
1536a4a
 
 
 
bf85a00
1536a4a
 
bf85a00
1536a4a
 
bf85a00
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
import streamlit as st
import pandas as pd
import pickle
import requests
import base64

#===config===
st.set_page_config(
     page_title="ETDs Tagging",
     page_icon="",
     layout="wide"
)
st.header("Tagging Categories")
st.subheader('Put your file here...')

#========unique id========
@st.cache_resource(ttl=3600)
def create_list():
    l = [1, 2, 3]
    return l

l = create_list()
first_list_value = l[0]
l[0] = first_list_value + 1
uID = str(l[0])

@st.cache_data(ttl=3600)
def get_ext(uploaded_file):
    extype = uID+uploaded_file.name
    return extype


@st.cache_resource(ttl=3600)
# def read_model(local_path):
#     with open(local_path, "rb") as f:
#         svm_classifier = pickle.load(f)
#     return svm_classifier


# def read_tf(local_path):
#     with open(local_path, "rb") as f:
#         preprocessing = pickle.load(f)
#     return preprocessing


def read_model(url):
    response = requests.get(url)
    open("temp.pkl", "wb").write(response.content)
    with open("temp.pkl", "rb") as f:
        svm_classifier = pickle.load(f)
    return svm_classifier


def read_tf(url):
    response = requests.get(url)
    open("temp.pkl", "wb").write(response.content)
    with open("temp.pkl", "rb") as f:
        preprocessing = pickle.load(f)
    return preprocessing

svm_classifier = read_model("https://github.com/manika-lamba/ml/raw/main/category/model2.pkl")
preprocessing = read_tf("https://github.com/manika-lamba/ml/raw/main/category/preprocessing.pkl")

# Function to predict the category for a given abstract
def predict_category(abstract):
    # Preprocess the abstract
    abstract_preprocessed = preprocessing.transform([abstract])
    # Make prediction
    prediction = svm_classifier.predict(abstract_preprocessed)
    return prediction

# Create sidebar
#===upload file===
@st.cache_data(ttl=3600)
def upload(file):
    papers = pd.read_csv(uploaded_file)
    return papers

@st.cache_data(ttl=3600)
def conv_txt(extype):
    papers = pd.read_csv(uploaded_file, sep='\t', lineterminator='\r')
    papers.rename(columns=col_dict, inplace=True)
    return papers

# Read Data
    
uploaded_file = st.file_uploader("Choose a file", type=['csv'])
st.sidebar.header("Download Results")
st.sidebar.text("Download the tagged results as a CSV file.")

if uploaded_file is not None:
    df = pd.read_csv(uploaded_file, encoding='latin-1')
    st.dataframe(df)
    
    # Tag the "Abstract" column with the corresponding categories
    df['category'] = df['Abstract'].apply(lambda x: predict_category(x)[0])
    st.dataframe(df)
    
    # Convert DataFrame to CSV and encode it properly
    csv = df.to_csv(index=False).encode('utf-8')
    b64 = base64.b64encode(csv).decode()  # Decode the Base64 encoded bytes

    st.sidebar.download_button(
        label="Download CSV",
        data=base64.b64decode(b64),  # Decode Base64 to bytes
        file_name="results.csv",
        mime="text/csv",
        key='download-csv'
    )