manika07 commited on
Commit
cd23ce3
·
1 Parent(s): d61c88f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -75
app.py CHANGED
@@ -4,45 +4,7 @@ import pickle
4
  import requests
5
  import base64
6
 
7
- #===config===
8
- st.set_page_config(
9
- page_title="ETDs Tagging",
10
- page_icon="",
11
- layout="wide"
12
- )
13
- st.header("Tagging Categories")
14
- st.subheader('Put your file here...')
15
-
16
- #========unique id========
17
- @st.cache_resource(ttl=3600)
18
- def create_list():
19
- l = [1, 2, 3]
20
- return l
21
-
22
- l = create_list()
23
- first_list_value = l[0]
24
- l[0] = first_list_value + 1
25
- uID = str(l[0])
26
-
27
- @st.cache_data(ttl=3600)
28
- def get_ext(uploaded_file):
29
- extype = uID+uploaded_file.name
30
- return extype
31
-
32
-
33
- @st.cache_resource(ttl=3600)
34
- # def read_model(local_path):
35
- # with open(local_path, "rb") as f:
36
- # svm_classifier = pickle.load(f)
37
- # return svm_classifier
38
-
39
-
40
- # def read_tf(local_path):
41
- # with open(local_path, "rb") as f:
42
- # preprocessing = pickle.load(f)
43
- # return preprocessing
44
-
45
-
46
  def read_model(url):
47
  response = requests.get(url)
48
  open("temp.pkl", "wb").write(response.content)
@@ -58,53 +20,39 @@ def read_tf(url):
58
  preprocessing = pickle.load(f)
59
  return preprocessing
60
 
61
- svm_classifier = read_model("https://github.com/manika-lamba/ml/raw/main/model-topics.pkl")
62
- preprocessing = read_tf("https://github.com/manika-lamba/ml/raw/main/tfidf-topics.pkl")
63
 
64
-
65
- # Function to predict the topic for a given abstract
66
- def predict_topic (abstract):
67
  # Preprocess the abstract
68
  abstract_preprocessed = preprocessing.transform([abstract])
69
  # Make prediction
70
- prediction = svm_classifier(abstract_preprocessed)
71
  return prediction
72
 
73
  # Create sidebar
74
- #===upload file===
75
- @st.cache_data(ttl=3600)
76
- def upload(file):
77
- papers = pd.read_csv(uploaded_file)
78
- return papers
79
-
80
- @st.cache_data(ttl=3600)
81
- def conv_txt(extype):
82
- papers = pd.read_csv(uploaded_file, sep='\t', lineterminator='\r')
83
- papers.rename(columns=col_dict, inplace=True)
84
- return papers
85
 
86
- # Read Data
87
-
88
- uploaded_file = st.file_uploader("Choose a file", type=['csv'])
89
- st.sidebar.header("Download Results")
90
- st.sidebar.text("Download the tagged results as a CSV file.")
91
 
92
  if uploaded_file is not None:
93
  df = pd.read_csv(uploaded_file, encoding='latin-1')
94
  st.dataframe(df)
95
-
96
- # Tag the "Abstract" column with the corresponding topics
97
- df['topic'] = df['Abstract'].apply(lambda x: predict_topic(x)[0])
98
  st.dataframe(df)
99
-
100
- # Convert DataFrame to CSV and encode it properly
101
- csv = df.to_csv(index=False).encode('utf-8')
102
- b64 = base64.b64encode(csv).decode() # Decode the Base64 encoded bytes
103
 
104
- st.sidebar.download_button(
105
- label="Download CSV",
106
- data=base64.b64decode(b64), # Decode Base64 to bytes
107
- file_name="results.csv",
108
- mime="text/csv",
109
- key='download-csv'
110
- )
 
 
 
 
 
 
4
  import requests
5
  import base64
6
 
7
+ @st.cache
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
  def read_model(url):
9
  response = requests.get(url)
10
  open("temp.pkl", "wb").write(response.content)
 
20
  preprocessing = pickle.load(f)
21
  return preprocessing
22
 
23
+ svm_classifier = read_model("https://github.com/manika-lamba/ml/blob/main/model-topics.pkl")
24
+ preprocessing = read_tf("https://github.com/manika-lamba/ml/blob/main/tfidf-topics.pkl")
25
 
26
+ # Function to predict the category for a given abstract
27
+ def predict_category(abstract):
 
28
  # Preprocess the abstract
29
  abstract_preprocessed = preprocessing.transform([abstract])
30
  # Make prediction
31
+ prediction = svm_classifier.predict(abstract_preprocessed)
32
  return prediction
33
 
34
  # Create sidebar
 
 
 
 
 
 
 
 
 
 
 
35
 
36
+ # Create tab for choosing CSV file
37
+ st.sidebar.header("Choose CSV File with 'Abstract' field")
38
+ uploaded_file = st.sidebar.file_uploader("", type=["csv"])
 
 
39
 
40
  if uploaded_file is not None:
41
  df = pd.read_csv(uploaded_file, encoding='latin-1')
42
  st.dataframe(df)
43
+ # Tag the "Abstract" column with the corresponding categories
44
+ df['category'] = df['Abstract'].apply(predict_category)
 
45
  st.dataframe(df)
 
 
 
 
46
 
47
+ st.sidebar.header("Download Results")
48
+ st.sidebar.text("Download the tagged results as a CSV file.")
49
+
50
+ # Create a download button
51
+ if st.sidebar.button("Download"):
52
+ csv = df.to_csv(index=False)
53
+ b64 = base64.b64encode(csv.encode()).decode()
54
+ href = f'<a href="data:file/csv;base64,{b64}" download="results.csv">Download csv file</a>'
55
+ st.markdown(href, unsafe_allow_html=True)
56
+
57
+ st.title("About")
58
+ st.subheader("You can tag your input CSV file of theses and dissertations with Library Science, Archival Studies, and Information Science categories. The screen will show the output.")