manika07 commited on
Commit
0221f7c
·
1 Parent(s): a0f899e

Upload 4 files

Browse files
Files changed (4) hide show
  1. app.py +109 -0
  2. model-topics.pkl +3 -0
  3. requirements.txt +28 -0
  4. tfidf-topics.pkl +3 -0
app.py ADDED
@@ -0,0 +1,109 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import pickle
4
+ import requests
5
+ import base64
6
+
7
+ #===config===
8
+ st.set_page_config(
9
+ page_title="ETDs Tagging",
10
+ page_icon="",
11
+ layout="wide"
12
+ )
13
+ st.header("Tagging Categories")
14
+ st.subheader('Put your file here...')
15
+
16
+ #========unique id========
17
+ @st.cache_resource(ttl=3600)
18
+ def create_list():
19
+ l = [1, 2, 3]
20
+ return l
21
+
22
+ l = create_list()
23
+ first_list_value = l[0]
24
+ l[0] = first_list_value + 1
25
+ uID = str(l[0])
26
+
27
+ @st.cache_data(ttl=3600)
28
+ def get_ext(uploaded_file):
29
+ extype = uID+uploaded_file.name
30
+ return extype
31
+
32
+
33
+ @st.cache_resource(ttl=3600)
34
+ # def read_model(local_path):
35
+ # with open(local_path, "rb") as f:
36
+ # svm_classifier = pickle.load(f)
37
+ # return svm_classifier
38
+
39
+
40
+ # def read_tf(local_path):
41
+ # with open(local_path, "rb") as f:
42
+ # preprocessing = pickle.load(f)
43
+ # return preprocessing
44
+
45
+
46
+ def read_model(url):
47
+ response = requests.get(url)
48
+ open("temp.pkl", "wb").write(response.content)
49
+ with open("temp.pkl", "rb") as f:
50
+ svm_classifier = pickle.load(f)
51
+ return svm_classifier
52
+
53
+
54
+ def read_tf(url):
55
+ response = requests.get(url)
56
+ open("temp.pkl", "wb").write(response.content)
57
+ with open("temp.pkl", "rb") as f:
58
+ preprocessing = pickle.load(f)
59
+ return preprocessing
60
+
61
+ svm_classifier = read_model("https://github.com/manika-lamba/ml/raw/main/category/model2.pkl")
62
+ preprocessing = read_tf("https://github.com/manika-lamba/ml/raw/main/category/preprocessing.pkl")
63
+
64
+ # Function to predict the category for a given abstract
65
+ def predict_category(abstract):
66
+ # Preprocess the abstract
67
+ abstract_preprocessed = preprocessing.transform([abstract])
68
+ # Make prediction
69
+ prediction = svm_classifier.predict(abstract_preprocessed)
70
+ return prediction
71
+
72
+ # Create sidebar
73
+ #===upload file===
74
+ @st.cache_data(ttl=3600)
75
+ def upload(file):
76
+ papers = pd.read_csv(uploaded_file)
77
+ return papers
78
+
79
+ @st.cache_data(ttl=3600)
80
+ def conv_txt(extype):
81
+ papers = pd.read_csv(uploaded_file, sep='\t', lineterminator='\r')
82
+ papers.rename(columns=col_dict, inplace=True)
83
+ return papers
84
+
85
+ # Read Data
86
+
87
+ uploaded_file = st.file_uploader("Choose a file", type=['csv'])
88
+ st.sidebar.header("Download Results")
89
+ st.sidebar.text("Download the tagged results as a CSV file.")
90
+
91
+ if uploaded_file is not None:
92
+ df = pd.read_csv(uploaded_file, encoding='latin-1')
93
+ st.dataframe(df)
94
+
95
+ # Tag the "Abstract" column with the corresponding categories
96
+ df['category'] = df['Abstract'].apply(lambda x: predict_category(x)[0])
97
+ st.dataframe(df)
98
+
99
+ # Convert DataFrame to CSV and encode it properly
100
+ csv = df.to_csv(index=False).encode('utf-8')
101
+ b64 = base64.b64encode(csv).decode() # Decode the Base64 encoded bytes
102
+
103
+ st.sidebar.download_button(
104
+ label="Download CSV",
105
+ data=base64.b64decode(b64), # Decode Base64 to bytes
106
+ file_name="results.csv",
107
+ mime="text/csv",
108
+ key='download-csv'
109
+ )
model-topics.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d23020e255ce442b0a6e52932278b23cec8e798834aa3241fd2d376e20d743cf
3
+ size 1518
requirements.txt ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ pip
2
+ streamlit==1.10.0
3
+ scikit-learn
4
+ gensim
5
+ matplotlib
6
+ nltk
7
+ numpy
8
+ pandas
9
+ pickleshare
10
+ plotly-express
11
+ plotly
12
+ pyldavis
13
+ regex
14
+ seaborn
15
+ setuptools
16
+ streamlit
17
+ streamlit-agraph
18
+ umap-learn
19
+ wordcloud
20
+ mlxtend
21
+ bertopic
22
+ ipywidgets==7.6.6
23
+ bitermplus
24
+ tmplot
25
+ tomotopy
26
+ pipeline
27
+ spacy==3.5.0
28
+ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.5.0/en_core_web_sm-3.5.0-py3-none-any.whl
tfidf-topics.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eeacc5c20b8f1ff2451d7395598645fc81dc1585ea5e7d3aa5251ec0620656c0
3
+ size 553208