File size: 2,419 Bytes
49f942d
 
bb0a66f
 
2afd8e2
bb0a66f
 
 
4666190
bb0a66f
 
 
 
 
 
 
f125d78
bb0a66f
4666190
bb0a66f
 
4666190
 
83574a2
3454b90
1d9dd26
 
b55ed45
15d0a5e
 
3454b90
1d9dd26
 
 
75672f4
b55ed45
15d0a5e
 
 
 
3454b90
1d9dd26
f125d78
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1d9dd26
 
9d51fb2
1d9dd26
9d51fb2
 
1d9dd26
 
 
 
d9e6d31
 
75672f4
b55ed45
15d0a5e
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
from sklearn.model_selection import train_test_split

import streamlit as st
import pandas as pd
import pickle
import requests
import base64



# Create sidebar

# Create tab for choosing CSV file
st.sidebar.header("Choose CSV File with 'Abstract' field")
uploaded_file = st.sidebar.file_uploader("", type=["csv"])





st.title("About")
st.subheader("You can tag your input CSV file of theses and dissertations with Library Science, Archival Studies, and Information Science categories. The screen will show the output.")

tab1, tab2, tab3 = st.tabs(["πŸ“ˆ Load Data", "πŸ“ƒ Tagged ETDs", "πŸ““ Download Data"])

with tab1:
#===load data===
    if uploaded_file is not None:
        df = pd.read_csv(uploaded_file, encoding='latin-1')
        st.dataframe(df)
    
with tab2:
#===tagged ETDs===
# Tag the "Abstract" column with the corresponding categories

    if uploaded_file is not None:
        df = pd.read_csv(uploaded_file, encoding='latin-1')
        st.dataframe(df)
        df['category'] = df['Abstract'].apply(predict_category)
        st.dataframe(df)
    
# Function to predict the category for a given abstract
@st.cache_data(ttl=3600)
def read_model(url):
    response = requests.get(url)
    open("temp.pkl", "wb").write(response.content)
    with open("temp.pkl", "rb") as f:
        svm_classifier = pickle.load(f)
    return svm_classifier


def read_tf(url):
    response = requests.get(url)
    open("temp.pkl", "wb").write(response.content)
    with open("temp.pkl", "rb") as f:
        preprocessing = pickle.load(f)
    return preprocessing

svm_classifier = read_model("https://github.com/manika-lamba/ml/raw/main/model2.pkl")
preprocessing = read_tf("https://github.com/manika-lamba/ml/raw/main/preprocessing.pkl")

def predict_category(abstract):
# Preprocess the abstract
    abstract_preprocessed = preprocessing.transform([abstract])
# Make prediction
    prediction = svm_classifier.predict(abstract_preprocessed)
    return prediction

with tab3:
#===download result===
# Create a download button
    st.sidebar.header("Download Results")
    st.sidebar.text("Download the tagged results as a CSV file.")

    if st.sidebar.button("Download"):
        csv = df.to_csv(index=False)
        b64 = base64.b64encode(csv.encode()).decode()
        href = f'<a href="data:file/csv;base64,{b64}" download="results.csv">Download csv file</a>'
        st.markdown(href, unsafe_allow_html=True)