Spaces:

manika07
/

topics

Sleeping

App Files Files Community

manika07 commited on Jan 10, 2024

Commit

cd23ce3

1 Parent(s): d61c88f

Update app.py

Browse files

Files changed (1) hide show

app.py +23 -75

app.py CHANGED Viewed

@@ -4,45 +4,7 @@ import pickle
 import requests
 import base64
-#===config===
-st.set_page_config(
-     page_title="ETDs Tagging",
-     page_icon="",
-     layout="wide"
-)
-st.header("Tagging Categories")
-st.subheader('Put your file here...')
-#========unique id========
-@st.cache_resource(ttl=3600)
-def create_list():
-    l = [1, 2, 3]
-    return l
-l = create_list()
-first_list_value = l[0]
-l[0] = first_list_value + 1
-uID = str(l[0])
-@st.cache_data(ttl=3600)
-def get_ext(uploaded_file):
-    extype = uID+uploaded_file.name
-    return extype
-@st.cache_resource(ttl=3600)
-# def read_model(local_path):
-#     with open(local_path, "rb") as f:
-#         svm_classifier = pickle.load(f)
-#     return svm_classifier
-# def read_tf(local_path):
-#     with open(local_path, "rb") as f:
-#         preprocessing = pickle.load(f)
-#     return preprocessing
 def read_model(url):
     response = requests.get(url)
     open("temp.pkl", "wb").write(response.content)
@@ -58,53 +20,39 @@ def read_tf(url):
         preprocessing = pickle.load(f)
     return preprocessing
-svm_classifier = read_model("https://github.com/manika-lamba/ml/raw/main/model-topics.pkl")
-preprocessing = read_tf("https://github.com/manika-lamba/ml/raw/main/tfidf-topics.pkl")
-# Function to predict the topic for a given abstract
-def predict_topic (abstract):
     # Preprocess the abstract
     abstract_preprocessed = preprocessing.transform([abstract])
     # Make prediction
-    prediction = svm_classifier(abstract_preprocessed)
     return prediction
 # Create sidebar
-#===upload file===
-@st.cache_data(ttl=3600)
-def upload(file):
-    papers = pd.read_csv(uploaded_file)
-    return papers
-@st.cache_data(ttl=3600)
-def conv_txt(extype):
-    papers = pd.read_csv(uploaded_file, sep='\t', lineterminator='\r')
-    papers.rename(columns=col_dict, inplace=True)
-    return papers
-# Read Data
-uploaded_file = st.file_uploader("Choose a file", type=['csv'])
-st.sidebar.header("Download Results")
-st.sidebar.text("Download the tagged results as a CSV file.")
 if uploaded_file is not None:
     df = pd.read_csv(uploaded_file, encoding='latin-1')
     st.dataframe(df)
-    # Tag the "Abstract" column with the corresponding topics
-    df['topic'] = df['Abstract'].apply(lambda x: predict_topic(x)[0])
     st.dataframe(df)
-    # Convert DataFrame to CSV and encode it properly
-    csv = df.to_csv(index=False).encode('utf-8')
-    b64 = base64.b64encode(csv).decode()  # Decode the Base64 encoded bytes
-    st.sidebar.download_button(
-        label="Download CSV",
-        data=base64.b64decode(b64),  # Decode Base64 to bytes
-        file_name="results.csv",
-        mime="text/csv",
-        key='download-csv'
-    )

 import requests
 import base64
+@st.cache
 def read_model(url):
     response = requests.get(url)
     open("temp.pkl", "wb").write(response.content)
         preprocessing = pickle.load(f)
     return preprocessing
+svm_classifier = read_model("https://github.com/manika-lamba/ml/blob/main/model-topics.pkl")
+preprocessing = read_tf("https://github.com/manika-lamba/ml/blob/main/tfidf-topics.pkl")
+# Function to predict the category for a given abstract
+def predict_category(abstract):
     # Preprocess the abstract
     abstract_preprocessed = preprocessing.transform([abstract])
     # Make prediction
+    prediction = svm_classifier.predict(abstract_preprocessed)
     return prediction
 # Create sidebar
+# Create tab for choosing CSV file
+st.sidebar.header("Choose CSV File with 'Abstract' field")
+uploaded_file = st.sidebar.file_uploader("", type=["csv"])
 if uploaded_file is not None:
     df = pd.read_csv(uploaded_file, encoding='latin-1')
     st.dataframe(df)
+    # Tag the "Abstract" column with the corresponding categories
+    df['category'] = df['Abstract'].apply(predict_category)
     st.dataframe(df)
+st.sidebar.header("Download Results")
+st.sidebar.text("Download the tagged results as a CSV file.")
+# Create a download button
+if st.sidebar.button("Download"):
+    csv = df.to_csv(index=False)
+    b64 = base64.b64encode(csv.encode()).decode()
+    href = f'<a href="data:file/csv;base64,{b64}" download="results.csv">Download csv file</a>'
+    st.markdown(href, unsafe_allow_html=True)
+st.title("About")
+st.subheader("You can tag your input CSV file of theses and dissertations with Library Science, Archival Studies, and Information Science categories. The screen will show the output.")