Update app.py
Browse files
app.py
CHANGED
@@ -1,46 +1,10 @@
|
|
1 |
-
from sklearn.model_selection import train_test_split
|
2 |
-
|
3 |
import streamlit as st
|
4 |
import pandas as pd
|
5 |
import pickle
|
6 |
import requests
|
7 |
import base64
|
8 |
|
9 |
-
|
10 |
-
|
11 |
-
# Create sidebar
|
12 |
-
|
13 |
-
# Create tab for choosing CSV file
|
14 |
-
st.sidebar.header("Choose CSV File with 'Abstract' field")
|
15 |
-
uploaded_file = st.sidebar.file_uploader("", type=["csv"])
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
st.title("About")
|
22 |
-
st.subheader("You can tag your input CSV file of theses and dissertations with Library Science, Archival Studies, and Information Science categories. The screen will show the output.")
|
23 |
-
|
24 |
-
tab1, tab2, tab3 = st.tabs(["π Load Data", "π Tagged ETDs", "π Download Data"])
|
25 |
-
|
26 |
-
with tab1:
|
27 |
-
#===load data===
|
28 |
-
if uploaded_file is not None:
|
29 |
-
df = pd.read_csv(uploaded_file, encoding='latin-1')
|
30 |
-
st.dataframe(df)
|
31 |
-
|
32 |
-
with tab2:
|
33 |
-
#===tagged ETDs===
|
34 |
-
# Tag the "Abstract" column with the corresponding categories
|
35 |
-
|
36 |
-
if uploaded_file is not None:
|
37 |
-
df = pd.read_csv(uploaded_file, encoding='latin-1')
|
38 |
-
st.dataframe(df)
|
39 |
-
df['category'] = df['Abstract'].apply(predict_category)
|
40 |
-
st.dataframe(df)
|
41 |
-
|
42 |
-
# Function to predict the category for a given abstract
|
43 |
-
@st.cache_data(ttl=3600)
|
44 |
def read_model(url):
|
45 |
response = requests.get(url)
|
46 |
open("temp.pkl", "wb").write(response.content)
|
@@ -59,21 +23,36 @@ def read_tf(url):
|
|
59 |
svm_classifier = read_model("https://github.com/manika-lamba/ml/raw/main/model2.pkl")
|
60 |
preprocessing = read_tf("https://github.com/manika-lamba/ml/raw/main/preprocessing.pkl")
|
61 |
|
|
|
62 |
def predict_category(abstract):
|
63 |
-
# Preprocess the abstract
|
64 |
abstract_preprocessed = preprocessing.transform([abstract])
|
65 |
-
# Make prediction
|
66 |
prediction = svm_classifier.predict(abstract_preprocessed)
|
67 |
return prediction
|
68 |
|
69 |
-
|
70 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
71 |
# Create a download button
|
72 |
-
|
73 |
-
|
|
|
|
|
|
|
74 |
|
75 |
-
|
76 |
-
|
77 |
-
b64 = base64.b64encode(csv.encode()).decode()
|
78 |
-
href = f'<a href="data:file/csv;base64,{b64}" download="results.csv">Download csv file</a>'
|
79 |
-
st.markdown(href, unsafe_allow_html=True)
|
|
|
|
|
|
|
1 |
import streamlit as st
|
2 |
import pandas as pd
|
3 |
import pickle
|
4 |
import requests
|
5 |
import base64
|
6 |
|
7 |
+
@st.cache
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8 |
def read_model(url):
|
9 |
response = requests.get(url)
|
10 |
open("temp.pkl", "wb").write(response.content)
|
|
|
23 |
svm_classifier = read_model("https://github.com/manika-lamba/ml/raw/main/model2.pkl")
|
24 |
preprocessing = read_tf("https://github.com/manika-lamba/ml/raw/main/preprocessing.pkl")
|
25 |
|
26 |
+
# Function to predict the category for a given abstract
|
27 |
def predict_category(abstract):
|
28 |
+
# Preprocess the abstract
|
29 |
abstract_preprocessed = preprocessing.transform([abstract])
|
30 |
+
# Make prediction
|
31 |
prediction = svm_classifier.predict(abstract_preprocessed)
|
32 |
return prediction
|
33 |
|
34 |
+
# Create sidebar
|
35 |
+
|
36 |
+
# Create tab for choosing CSV file
|
37 |
+
st.sidebar.header("Choose CSV File with 'Abstract' field")
|
38 |
+
uploaded_file = st.sidebar.file_uploader("", type=["csv"])
|
39 |
+
|
40 |
+
if uploaded_file is not None:
|
41 |
+
df = pd.read_csv(uploaded_file, encoding='latin-1')
|
42 |
+
st.dataframe(df)
|
43 |
+
# Tag the "Abstract" column with the corresponding categories
|
44 |
+
df['category'] = df['Abstract'].apply(predict_category)
|
45 |
+
st.dataframe(df)
|
46 |
+
|
47 |
+
st.sidebar.header("Download Results")
|
48 |
+
st.sidebar.text("Download the tagged results as a CSV file.")
|
49 |
+
|
50 |
# Create a download button
|
51 |
+
if st.sidebar.button("Download"):
|
52 |
+
csv = df.to_csv(index=False)
|
53 |
+
b64 = base64.b64encode(csv.encode()).decode()
|
54 |
+
href = f'<a href="data:file/csv;base64,{b64}" download="results.csv">Download csv file</a>'
|
55 |
+
st.markdown(href, unsafe_allow_html=True)
|
56 |
|
57 |
+
st.title("About")
|
58 |
+
st.subheader("You can tag your input CSV file of theses and dissertations with Library Science, Archival Studies, and Information Science categories. The screen will show the output.")
|
|
|
|
|
|