File size: 2,722 Bytes
49f942d
 
bb0a66f
 
2afd8e2
bb0a66f
 
 
196a9c0
bb0a66f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
333facd
 
 
bb0a66f
 
 
 
 
 
 
 
 
196a9c0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
from sklearn.model_selection import train_test_split

import streamlit as st
import pandas as pd
import pickle
import requests
import base64

@st.cache_data(ttl=3600)
def read_model(url):
    response = requests.get(url)
    open("temp.pkl", "wb").write(response.content)
    with open("temp.pkl", "rb") as f:
        svm_classifier = pickle.load(f)
    return svm_classifier


def read_tf(url):
    response = requests.get(url)
    open("temp.pkl", "wb").write(response.content)
    with open("temp.pkl", "rb") as f:
        preprocessing = pickle.load(f)
    return preprocessing

svm_classifier = read_model("https://github.com/manika-lamba/ml/raw/main/model2.pkl")
preprocessing = read_tf("https://github.com/manika-lamba/ml/raw/main/preprocessing.pkl")

# Function to predict the category for a given abstract
def predict_category(abstract):
    # Preprocess the abstract
    abstract_preprocessed = preprocessing.transform([abstract])
    # Make prediction
    prediction = svm_classifier.predict(abstract_preprocessed)
    return prediction

# Create sidebar

# Create tab for choosing CSV file
st.sidebar.header("Choose CSV File with 'Abstract' field")
uploaded_file = st.sidebar.file_uploader("", type=["csv"])

if uploaded_file is not None:
    df = pd.read_csv(uploaded_file, encoding='latin-1')
    st.dataframe(df)
    # Tag the "Abstract" column with the corresponding categories
    df['category'] = df['Abstract'].apply(predict_category)
    st.dataframe(df)
    
st.sidebar.header("Download Results")
st.sidebar.text("Download the tagged results as a CSV file.")

# Create a download button
if st.sidebar.button("Download"):
    csv = df.to_csv(index=False)
    b64 = base64.b64encode(csv.encode()).decode()
    href = f'<a href="data:file/csv;base64,{b64}" download="results.csv">Download csv file</a>'
    st.markdown(href, unsafe_allow_html=True)

st.title("About")
st.subheader("")



#import module
import streamlit as st
from PIL import Image

#===config===
st.set_page_config(
     page_title="ETDs",
     layout="wide"
)
st.title('Tag ETDs')

#===page===
mt1, mt2, mt3 = st.tabs(["About", "Behind this app"])

with mt1:
   st.header("🌌 Hello!")
   st.write('You can tag your input CSV file of theses and dissertations with Library Science, Archival Studies, and Information Science categories. The screen will show the output.')
   st.text('')
   st.text('')
   st.text('')
   st.text('')
   st.divider()
   st.error("This app works on Scopus's CSV file, Web of Science's Tab delimited file, and custom CSV file.", icon="🚨")
      
with mt2:
   st.header('Behind this app')
   st.subheader('Dr. Manika Lamba')
   st.text('Postdoctoral Research Associate, UIUC')
   st.text('')
   st.text('')