manika07 commited on
Commit
ed9dc6d
·
1 Parent(s): 5425a1c

Upload 3 files

Browse files
Files changed (3) hide show
  1. .streamlit/config.toml +2 -0
  2. Home.py +54 -0
  3. pages/ml.py +92 -0
.streamlit/config.toml ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ [theme]
2
+ base="light"
Home.py ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #import module
2
+ import streamlit as st
3
+ from PIL import Image
4
+
5
+ #===config===
6
+ st.set_page_config(
7
+ page_title="ETDs Tagging",
8
+ page_icon="",
9
+ layout="wide"
10
+ )
11
+ st.title('ETDs Tagging Tool')
12
+ st.sidebar.success('Select page above')
13
+
14
+ #===page===
15
+ mt1, mt2, mt3 = st.tabs(["About", "How to", "Behind this app"])
16
+
17
+ with mt1:
18
+ st.header("🌌 Hello ")
19
+ st.write('You can tag your input CSV file of theses and dissertations with Library Science, Archival Studies, and Information Science categories. The screen will show the output.')
20
+ st.text('')
21
+ st.text('')
22
+ st.text('')
23
+ st.text('')
24
+ st.divider()
25
+ st.error("This app works on CSV file having 'Abstract' field", icon="🚨")
26
+
27
+ with mt2:
28
+ st.header("Before you start")
29
+ option = st.selectbox(
30
+ 'Please choose....',
31
+ ('Tagging Categories', 'Tagging Sub-Categories'))
32
+
33
+ if option == 'Tagging Categories':
34
+ tab1 = st.tabs(["Prologue"])
35
+ with tab1:
36
+ st.text("""
37
+ +----------------+------------------------+---------------------------------+
38
+ | S.No. | Category name |
39
+ +----------------+------------------------+---------------------------------+
40
+ | 1 | Library Science |
41
+ +----------------+------------------------+---------------------------------+
42
+ | 2 | Information Science |
43
+ +----------------+------------------------+---------------------------------+
44
+ | 3 | Archival Studies ' |
45
+ +----------------+------------------------+---------------------------------+
46
+ """)
47
+
48
+ with mt3:
49
+ st.header('Behind this app')
50
+ st.subheader('Dr. Manika Lamba')
51
+ st.text('Elected Standing Committee Member & Chair of Professional Development Sub-Committee at IFLA STL Section | Editor-in-Chief for IJLIS (IGI Global).')
52
+ st.text('')
53
+ st.text('')
54
+ st.divider()
pages/ml.py ADDED
@@ -0,0 +1,92 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import pickle
4
+ import requests
5
+ import base64
6
+
7
+ #===config===
8
+ st.set_page_config(
9
+ page_title="ETDs Tagging",
10
+ page_icon="",
11
+ layout="wide"
12
+ )
13
+ st.header("Tagging Categories")
14
+ st.subheader('Put your file here...')
15
+
16
+ #========unique id========
17
+ @st.cache_resource(ttl=3600)
18
+ def create_list():
19
+ l = [1, 2, 3]
20
+ return l
21
+
22
+ l = create_list()
23
+ first_list_value = l[0]
24
+ l[0] = first_list_value + 1
25
+ uID = str(l[0])
26
+
27
+ @st.cache_data(ttl=3600)
28
+ def get_ext(uploaded_file):
29
+ extype = uID+uploaded_file.name
30
+ return extype
31
+
32
+
33
+
34
+ @st.cache
35
+ def read_model(url):
36
+ response = requests.get(url)
37
+ open("temp.pkl", "wb").write(response.content)
38
+ with open("temp.pkl", "rb") as f:
39
+ svm_classifier = pickle.load(f)
40
+ return svm_classifier
41
+
42
+
43
+ def read_tf(url):
44
+ response = requests.get(url)
45
+ open("temp.pkl", "wb").write(response.content)
46
+ with open("temp.pkl", "rb") as f:
47
+ preprocessing = pickle.load(f)
48
+ return preprocessing
49
+
50
+ svm_classifier = read_model("https://github.com/manika-lamba/ml/raw/main/model2.pkl")
51
+ preprocessing = read_tf("https://github.com/manika-lamba/ml/raw/main/preprocessing.pkl")
52
+
53
+ # Function to predict the category for a given abstract
54
+ def predict_category(abstract):
55
+ # Preprocess the abstract
56
+ abstract_preprocessed = preprocessing.transform([abstract])
57
+ # Make prediction
58
+ prediction = svm_classifier.predict(abstract_preprocessed)
59
+ return prediction
60
+
61
+ # Create sidebar
62
+ #===upload file===
63
+ @st.cache_data(ttl=3600)
64
+ def upload(file):
65
+ papers = pd.read_csv(uploaded_file)
66
+ return papers
67
+
68
+ @st.cache_data(ttl=3600)
69
+ def conv_txt(extype):
70
+ papers = pd.read_csv(uploaded_file, sep='\t', lineterminator='\r')
71
+ papers.rename(columns=col_dict, inplace=True)
72
+ return papers
73
+
74
+ #===Read data===
75
+ uploaded_file = st.file_uploader("Choose a file", type=['csv'], on_change=reset_all)
76
+
77
+ if uploaded_file is not None:
78
+ df = pd.read_csv(uploaded_file, encoding='latin-1')
79
+ st.dataframe(df)
80
+ # Tag the "Abstract" column with the corresponding categories
81
+ df['category'] = df['Abstract'].apply(predict_category)
82
+ st.dataframe(df)
83
+
84
+ st.sidebar.header("Download Results")
85
+ st.sidebar.text("Download the tagged results as a CSV file.")
86
+
87
+ # Create a download button
88
+ if st.sidebar.button("Download"):
89
+ csv = df.to_csv(index=False)
90
+ b64 = base64.b64encode(csv.encode()).decode()
91
+ href = f'<a href="data:file/csv;base64,{b64}" download="results.csv">Download csv file</a>'
92
+ st.markdown(href, unsafe_allow_html=True)