File size: 2,419 Bytes
49f942d bb0a66f 2afd8e2 bb0a66f 4666190 bb0a66f f125d78 bb0a66f 4666190 bb0a66f 4666190 83574a2 3454b90 1d9dd26 b55ed45 15d0a5e 3454b90 1d9dd26 75672f4 b55ed45 15d0a5e 3454b90 1d9dd26 f125d78 1d9dd26 9d51fb2 1d9dd26 9d51fb2 1d9dd26 d9e6d31 75672f4 b55ed45 15d0a5e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 |
from sklearn.model_selection import train_test_split
import streamlit as st
import pandas as pd
import pickle
import requests
import base64
# Create sidebar
# Create tab for choosing CSV file
st.sidebar.header("Choose CSV File with 'Abstract' field")
uploaded_file = st.sidebar.file_uploader("", type=["csv"])
st.title("About")
st.subheader("You can tag your input CSV file of theses and dissertations with Library Science, Archival Studies, and Information Science categories. The screen will show the output.")
tab1, tab2, tab3 = st.tabs(["π Load Data", "π Tagged ETDs", "π Download Data"])
with tab1:
#===load data===
if uploaded_file is not None:
df = pd.read_csv(uploaded_file, encoding='latin-1')
st.dataframe(df)
with tab2:
#===tagged ETDs===
# Tag the "Abstract" column with the corresponding categories
if uploaded_file is not None:
df = pd.read_csv(uploaded_file, encoding='latin-1')
st.dataframe(df)
df['category'] = df['Abstract'].apply(predict_category)
st.dataframe(df)
# Function to predict the category for a given abstract
@st.cache_data(ttl=3600)
def read_model(url):
response = requests.get(url)
open("temp.pkl", "wb").write(response.content)
with open("temp.pkl", "rb") as f:
svm_classifier = pickle.load(f)
return svm_classifier
def read_tf(url):
response = requests.get(url)
open("temp.pkl", "wb").write(response.content)
with open("temp.pkl", "rb") as f:
preprocessing = pickle.load(f)
return preprocessing
svm_classifier = read_model("https://github.com/manika-lamba/ml/raw/main/model2.pkl")
preprocessing = read_tf("https://github.com/manika-lamba/ml/raw/main/preprocessing.pkl")
def predict_category(abstract):
# Preprocess the abstract
abstract_preprocessed = preprocessing.transform([abstract])
# Make prediction
prediction = svm_classifier.predict(abstract_preprocessed)
return prediction
with tab3:
#===download result===
# Create a download button
st.sidebar.header("Download Results")
st.sidebar.text("Download the tagged results as a CSV file.")
if st.sidebar.button("Download"):
csv = df.to_csv(index=False)
b64 = base64.b64encode(csv.encode()).decode()
href = f'<a href="data:file/csv;base64,{b64}" download="results.csv">Download csv file</a>'
st.markdown(href, unsafe_allow_html=True) |