Update app.py
Browse files
app.py
CHANGED
@@ -4,45 +4,7 @@ import pickle
|
|
4 |
import requests
|
5 |
import base64
|
6 |
|
7 |
-
|
8 |
-
st.set_page_config(
|
9 |
-
page_title="ETDs Tagging",
|
10 |
-
page_icon="",
|
11 |
-
layout="wide"
|
12 |
-
)
|
13 |
-
st.header("Tagging Categories")
|
14 |
-
st.subheader('Put your file here...')
|
15 |
-
|
16 |
-
#========unique id========
|
17 |
-
@st.cache_resource(ttl=3600)
|
18 |
-
def create_list():
|
19 |
-
l = [1, 2, 3]
|
20 |
-
return l
|
21 |
-
|
22 |
-
l = create_list()
|
23 |
-
first_list_value = l[0]
|
24 |
-
l[0] = first_list_value + 1
|
25 |
-
uID = str(l[0])
|
26 |
-
|
27 |
-
@st.cache_data(ttl=3600)
|
28 |
-
def get_ext(uploaded_file):
|
29 |
-
extype = uID+uploaded_file.name
|
30 |
-
return extype
|
31 |
-
|
32 |
-
|
33 |
-
@st.cache_resource(ttl=3600)
|
34 |
-
# def read_model(local_path):
|
35 |
-
# with open(local_path, "rb") as f:
|
36 |
-
# svm_classifier = pickle.load(f)
|
37 |
-
# return svm_classifier
|
38 |
-
|
39 |
-
|
40 |
-
# def read_tf(local_path):
|
41 |
-
# with open(local_path, "rb") as f:
|
42 |
-
# preprocessing = pickle.load(f)
|
43 |
-
# return preprocessing
|
44 |
-
|
45 |
-
|
46 |
def read_model(url):
|
47 |
response = requests.get(url)
|
48 |
open("temp.pkl", "wb").write(response.content)
|
@@ -58,53 +20,39 @@ def read_tf(url):
|
|
58 |
preprocessing = pickle.load(f)
|
59 |
return preprocessing
|
60 |
|
61 |
-
svm_classifier = read_model("https://github.com/manika-lamba/ml/
|
62 |
-
preprocessing = read_tf("https://github.com/manika-lamba/ml/
|
63 |
|
64 |
-
|
65 |
-
|
66 |
-
def predict_topic (abstract):
|
67 |
# Preprocess the abstract
|
68 |
abstract_preprocessed = preprocessing.transform([abstract])
|
69 |
# Make prediction
|
70 |
-
prediction = svm_classifier(abstract_preprocessed)
|
71 |
return prediction
|
72 |
|
73 |
# Create sidebar
|
74 |
-
#===upload file===
|
75 |
-
@st.cache_data(ttl=3600)
|
76 |
-
def upload(file):
|
77 |
-
papers = pd.read_csv(uploaded_file)
|
78 |
-
return papers
|
79 |
-
|
80 |
-
@st.cache_data(ttl=3600)
|
81 |
-
def conv_txt(extype):
|
82 |
-
papers = pd.read_csv(uploaded_file, sep='\t', lineterminator='\r')
|
83 |
-
papers.rename(columns=col_dict, inplace=True)
|
84 |
-
return papers
|
85 |
|
86 |
-
#
|
87 |
-
|
88 |
-
uploaded_file = st.file_uploader("
|
89 |
-
st.sidebar.header("Download Results")
|
90 |
-
st.sidebar.text("Download the tagged results as a CSV file.")
|
91 |
|
92 |
if uploaded_file is not None:
|
93 |
df = pd.read_csv(uploaded_file, encoding='latin-1')
|
94 |
st.dataframe(df)
|
95 |
-
|
96 |
-
|
97 |
-
df['topic'] = df['Abstract'].apply(lambda x: predict_topic(x)[0])
|
98 |
st.dataframe(df)
|
99 |
-
|
100 |
-
# Convert DataFrame to CSV and encode it properly
|
101 |
-
csv = df.to_csv(index=False).encode('utf-8')
|
102 |
-
b64 = base64.b64encode(csv).decode() # Decode the Base64 encoded bytes
|
103 |
|
104 |
-
|
105 |
-
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
-
)
|
|
|
|
|
|
|
|
|
|
|
|
4 |
import requests
|
5 |
import base64
|
6 |
|
7 |
+
@st.cache
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8 |
def read_model(url):
|
9 |
response = requests.get(url)
|
10 |
open("temp.pkl", "wb").write(response.content)
|
|
|
20 |
preprocessing = pickle.load(f)
|
21 |
return preprocessing
|
22 |
|
23 |
+
svm_classifier = read_model("https://github.com/manika-lamba/ml/blob/main/model-topics.pkl")
|
24 |
+
preprocessing = read_tf("https://github.com/manika-lamba/ml/blob/main/tfidf-topics.pkl")
|
25 |
|
26 |
+
# Function to predict the category for a given abstract
|
27 |
+
def predict_category(abstract):
|
|
|
28 |
# Preprocess the abstract
|
29 |
abstract_preprocessed = preprocessing.transform([abstract])
|
30 |
# Make prediction
|
31 |
+
prediction = svm_classifier.predict(abstract_preprocessed)
|
32 |
return prediction
|
33 |
|
34 |
# Create sidebar
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
35 |
|
36 |
+
# Create tab for choosing CSV file
|
37 |
+
st.sidebar.header("Choose CSV File with 'Abstract' field")
|
38 |
+
uploaded_file = st.sidebar.file_uploader("", type=["csv"])
|
|
|
|
|
39 |
|
40 |
if uploaded_file is not None:
|
41 |
df = pd.read_csv(uploaded_file, encoding='latin-1')
|
42 |
st.dataframe(df)
|
43 |
+
# Tag the "Abstract" column with the corresponding categories
|
44 |
+
df['category'] = df['Abstract'].apply(predict_category)
|
|
|
45 |
st.dataframe(df)
|
|
|
|
|
|
|
|
|
46 |
|
47 |
+
st.sidebar.header("Download Results")
|
48 |
+
st.sidebar.text("Download the tagged results as a CSV file.")
|
49 |
+
|
50 |
+
# Create a download button
|
51 |
+
if st.sidebar.button("Download"):
|
52 |
+
csv = df.to_csv(index=False)
|
53 |
+
b64 = base64.b64encode(csv.encode()).decode()
|
54 |
+
href = f'<a href="data:file/csv;base64,{b64}" download="results.csv">Download csv file</a>'
|
55 |
+
st.markdown(href, unsafe_allow_html=True)
|
56 |
+
|
57 |
+
st.title("About")
|
58 |
+
st.subheader("You can tag your input CSV file of theses and dissertations with Library Science, Archival Studies, and Information Science categories. The screen will show the output.")
|