Spaces:

manika07
/

etds

Runtime error

App Files Files Community

manika07 commited on Jun 16, 2023

Commit

bb0a66f

1 Parent(s): 02b57f8

first commit

Browse files

Files changed (4) hide show

model2.pkl +3 -0
preprocessing.pkl +3 -0
requirements.txt +5 -0
streamlit_app.py +58 -0

model2.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5fd47b1a0097fe8a96af6c32871937954156feda2b4f9c797b23da1a4b3c696a
+size 444751

preprocessing.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:545b23fd00081f8a659fbb92e508eb672d69ba4e0f8436c27286c026ad7574f0
+size 553156

requirements.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+streamlit
+pandas
+pickle
+requests
+base64

streamlit_app.py ADDED Viewed

	@@ -0,0 +1,58 @@

+import streamlit as st
+import pandas as pd
+import pickle
+import requests
+import base64
+@st.cache
+def read_model(url):
+    response = requests.get(url)
+    open("temp.pkl", "wb").write(response.content)
+    with open("temp.pkl", "rb") as f:
+        svm_classifier = pickle.load(f)
+    return svm_classifier
+def read_tf(url):
+    response = requests.get(url)
+    open("temp.pkl", "wb").write(response.content)
+    with open("temp.pkl", "rb") as f:
+        preprocessing = pickle.load(f)
+    return preprocessing
+svm_classifier = read_model("https://github.com/manika-lamba/ml/raw/main/model2.pkl")
+preprocessing = read_tf("https://github.com/manika-lamba/ml/raw/main/preprocessing.pkl")
+# Function to predict the category for a given abstract
+def predict_category(abstract):
+    # Preprocess the abstract
+    abstract_preprocessed = preprocessing.transform([abstract])
+    # Make prediction
+    prediction = svm_classifier.predict(abstract_preprocessed)
+    return prediction
+# Create sidebar
+# Create tab for choosing CSV file
+st.sidebar.header("Choose CSV File with 'Abstract' field")
+uploaded_file = st.sidebar.file_uploader("", type=["csv"])
+if uploaded_file is not None:
+    df = pd.read_csv(uploaded_file, encoding='latin-1')
+    st.dataframe(df)
+    # Tag the "Abstract" column with the corresponding categories
+    df['category'] = df['Abstract'].apply(predict_category)
+    st.dataframe(df)
+st.sidebar.header("Download Results")
+st.sidebar.text("Download the tagged results as a CSV file.")
+# Create a download button
+if st.sidebar.button("Download"):
+    csv = df.to_csv(index=False)
+    b64 = base64.b64encode(csv.encode()).decode()
+    href = f'<a href="data:file/csv;base64,{b64}" download="results.csv">Download csv file</a>'
+    st.markdown(href, unsafe_allow_html=True)
+st.title("About")
+st.subheader("You can tag your input CSV file of theses and dissertations with Library Science, Archival Studies, and Information Science categories. The screen will show the output.")