manika07 commited on
Commit
bb0a66f
·
1 Parent(s): 02b57f8

first commit

Browse files
Files changed (4) hide show
  1. model2.pkl +3 -0
  2. preprocessing.pkl +3 -0
  3. requirements.txt +5 -0
  4. streamlit_app.py +58 -0
model2.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5fd47b1a0097fe8a96af6c32871937954156feda2b4f9c797b23da1a4b3c696a
3
+ size 444751
preprocessing.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:545b23fd00081f8a659fbb92e508eb672d69ba4e0f8436c27286c026ad7574f0
3
+ size 553156
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ streamlit
2
+ pandas
3
+ pickle
4
+ requests
5
+ base64
streamlit_app.py ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import pickle
4
+ import requests
5
+ import base64
6
+
7
+ @st.cache
8
+ def read_model(url):
9
+ response = requests.get(url)
10
+ open("temp.pkl", "wb").write(response.content)
11
+ with open("temp.pkl", "rb") as f:
12
+ svm_classifier = pickle.load(f)
13
+ return svm_classifier
14
+
15
+
16
+ def read_tf(url):
17
+ response = requests.get(url)
18
+ open("temp.pkl", "wb").write(response.content)
19
+ with open("temp.pkl", "rb") as f:
20
+ preprocessing = pickle.load(f)
21
+ return preprocessing
22
+
23
+ svm_classifier = read_model("https://github.com/manika-lamba/ml/raw/main/model2.pkl")
24
+ preprocessing = read_tf("https://github.com/manika-lamba/ml/raw/main/preprocessing.pkl")
25
+
26
+ # Function to predict the category for a given abstract
27
+ def predict_category(abstract):
28
+ # Preprocess the abstract
29
+ abstract_preprocessed = preprocessing.transform([abstract])
30
+ # Make prediction
31
+ prediction = svm_classifier.predict(abstract_preprocessed)
32
+ return prediction
33
+
34
+ # Create sidebar
35
+
36
+ # Create tab for choosing CSV file
37
+ st.sidebar.header("Choose CSV File with 'Abstract' field")
38
+ uploaded_file = st.sidebar.file_uploader("", type=["csv"])
39
+
40
+ if uploaded_file is not None:
41
+ df = pd.read_csv(uploaded_file, encoding='latin-1')
42
+ st.dataframe(df)
43
+ # Tag the "Abstract" column with the corresponding categories
44
+ df['category'] = df['Abstract'].apply(predict_category)
45
+ st.dataframe(df)
46
+
47
+ st.sidebar.header("Download Results")
48
+ st.sidebar.text("Download the tagged results as a CSV file.")
49
+
50
+ # Create a download button
51
+ if st.sidebar.button("Download"):
52
+ csv = df.to_csv(index=False)
53
+ b64 = base64.b64encode(csv.encode()).decode()
54
+ href = f'<a href="data:file/csv;base64,{b64}" download="results.csv">Download csv file</a>'
55
+ st.markdown(href, unsafe_allow_html=True)
56
+
57
+ st.title("About")
58
+ st.subheader("You can tag your input CSV file of theses and dissertations with Library Science, Archival Studies, and Information Science categories. The screen will show the output.")