|
from sklearn.model_selection import train_test_split |
|
|
|
import streamlit as st |
|
import pandas as pd |
|
import pickle |
|
import requests |
|
import base64 |
|
|
|
@st.cache_data(ttl=3600) |
|
def read_model(url): |
|
response = requests.get(url) |
|
open("temp.pkl", "wb").write(response.content) |
|
with open("temp.pkl", "rb") as f: |
|
svm_classifier = pickle.load(f) |
|
return svm_classifier |
|
|
|
|
|
def read_tf(url): |
|
response = requests.get(url) |
|
open("temp.pkl", "wb").write(response.content) |
|
with open("temp.pkl", "rb") as f: |
|
preprocessing = pickle.load(f) |
|
return preprocessing |
|
|
|
svm_classifier = read_model("https://github.com/manika-lamba/ml/raw/main/model2.pkl") |
|
preprocessing = read_tf("https://github.com/manika-lamba/ml/raw/main/preprocessing.pkl") |
|
|
|
|
|
def predict_category(abstract): |
|
|
|
abstract_preprocessed = preprocessing.transform([abstract]) |
|
|
|
prediction = svm_classifier.predict(abstract_preprocessed) |
|
return prediction |
|
|
|
|
|
|
|
|
|
st.sidebar.header("Choose CSV File with 'Abstract' field") |
|
uploaded_file = st.sidebar.file_uploader("", type=["csv"]) |
|
|
|
if uploaded_file is not None: |
|
df = pd.read_csv(uploaded_file, encoding='latin-1') |
|
st.dataframe(df) |
|
|
|
df['category'] = df['Abstract'].apply(predict_category) |
|
st.dataframe(df) |
|
|
|
st.sidebar.header("Download Results") |
|
st.sidebar.text("Download the tagged results as a CSV file.") |
|
|
|
|
|
if st.sidebar.button("Download"): |
|
csv = df.to_csv(index=False) |
|
b64 = base64.b64encode(csv.encode()).decode() |
|
href = f'<a href="data:file/csv;base64,{b64}" download="results.csv">Download csv file</a>' |
|
st.markdown(href, unsafe_allow_html=True) |
|
|
|
st.title("About") |
|
st.subheader("") |
|
|
|
|
|
|
|
|
|
import streamlit as st |
|
from PIL import Image |
|
|
|
|
|
st.set_page_config( |
|
page_title="ETDs", |
|
layout="wide" |
|
) |
|
st.title('Tag ETDs') |
|
|
|
|
|
mt1, mt2, mt3 = st.tabs(["About", "Behind this app"]) |
|
|
|
with mt1: |
|
st.header("π Hello!") |
|
st.write('You can tag your input CSV file of theses and dissertations with Library Science, Archival Studies, and Information Science categories. The screen will show the output.') |
|
st.text('') |
|
st.text('') |
|
st.text('') |
|
st.text('') |
|
st.divider() |
|
st.error("This app works on Scopus's CSV file, Web of Science's Tab delimited file, and custom CSV file.", icon="π¨") |
|
|
|
with mt2: |
|
st.header('Behind this app') |
|
st.subheader('Dr. Manika Lamba') |
|
st.text('Postdoctoral Research Associate, UIUC') |
|
st.text('') |
|
st.text('') |