|
import streamlit as st |
|
import pandas as pd |
|
import pickle |
|
import requests |
|
import base64 |
|
|
|
@st.cache |
|
def read_model(url): |
|
response = requests.get(url) |
|
open("temp.pkl", "wb").write(response.content) |
|
with open("temp.pkl", "rb") as f: |
|
svm_classifier = pickle.load(f) |
|
return svm_classifier |
|
|
|
|
|
def read_tf(url): |
|
response = requests.get(url) |
|
open("temp.pkl", "wb").write(response.content) |
|
with open("temp.pkl", "rb") as f: |
|
preprocessing = pickle.load(f) |
|
return preprocessing |
|
|
|
svm_classifier = read_model("https://github.com/manika-lamba/ml/raw/main/model2.pkl") |
|
preprocessing = read_tf("https://github.com/manika-lamba/ml/raw/main/preprocessing.pkl") |
|
|
|
|
|
def predict_category(abstract): |
|
|
|
abstract_preprocessed = preprocessing.transform([abstract]) |
|
|
|
prediction = svm_classifier.predict(abstract_preprocessed) |
|
return prediction |
|
|
|
|
|
|
|
|
|
st.sidebar.header("Choose CSV File with 'Abstract' field") |
|
uploaded_file = st.sidebar.file_uploader("", type=["csv"]) |
|
|
|
if uploaded_file is not None: |
|
df = pd.read_csv(uploaded_file, encoding='latin-1') |
|
st.dataframe(df) |
|
|
|
df['category'] = df['Abstract'].apply(predict_category) |
|
st.dataframe(df) |
|
|
|
st.sidebar.header("Download Results") |
|
st.sidebar.text("Download the tagged results as a CSV file.") |
|
|
|
|
|
if st.sidebar.button("Download"): |
|
csv = df.to_csv(index=False) |
|
b64 = base64.b64encode(csv.encode()).decode() |
|
href = f'<a href="data:file/csv;base64,{b64}" download="results.csv">Download csv file</a>' |
|
st.markdown(href, unsafe_allow_html=True) |
|
|
|
st.title("About") |
|
st.subheader("You can tag your input CSV file of theses and dissertations with Library Science, Archival Studies, and Information Science categories. The screen will show the output.") |
|
|