import streamlit as st import pandas as pd import pickle import requests import base64 #===config=== st.set_page_config( page_title="ETDs Tagging", page_icon="", layout="wide" ) st.header("Tagging Categories") st.subheader('Put your file here...') #========unique id======== @st.cache_resource(ttl=3600) def create_list(): l = [1, 2, 3] return l l = create_list() first_list_value = l[0] l[0] = first_list_value + 1 uID = str(l[0]) @st.cache_data(ttl=3600) def get_ext(uploaded_file): extype = uID+uploaded_file.name return extype @st.cache_resource(ttl=3600) def read_model(url): response = requests.get(url) open("temp.pkl", "wb").write(response.content) with open("temp.pkl", "rb") as f: svm_classifier = pickle.load(f) return svm_classifier def read_tf(url): response = requests.get(url) open("temp.pkl", "wb").write(response.content) with open("temp.pkl", "rb") as f: preprocessing = pickle.load(f) return preprocessing svm_classifier = read_model("https://github.com/manika-lamba/ml/raw/main/category/model2.pkl") preprocessing = read_tf("https://github.com/manika-lamba/ml/raw/main/category/preprocessing.pkl") # Function to predict the category for a given abstract def predict_category(abstract): # Preprocess the abstract abstract_preprocessed = preprocessing.transform([abstract]) # Make prediction prediction = svm_classifier.predict(abstract_preprocessed) return prediction # Create sidebar #===upload file=== @st.cache_data(ttl=3600) def upload(file): papers = pd.read_csv(uploaded_file) return papers @st.cache_data(ttl=3600) def conv_txt(extype): papers = pd.read_csv(uploaded_file, sep='\t', lineterminator='\r') papers.rename(columns=col_dict, inplace=True) return papers #===Read data=== uploaded_file = st.file_uploader("Choose a file", type=['csv']) if uploaded_file is not None: df = pd.read_csv(uploaded_file, encoding='latin-1') st.dataframe(df) # Tag the "Abstract" column with the corresponding categories df['category'] = df['Abstract'].apply(predict_category) st.dataframe(df) # Create download tab with st.sidebar: st.header("Download") if st.button("Download CSV file"): download_csv(df) def download_csv(df): """Downloads a CSV file from the given DataFrame.""" b64_df = base64.b64encode(df.to_csv().encode()).decode() headers = {"Content-Disposition": "attachment; filename=data.csv"} st.download_file(b64_df, "data.csv", headers=headers)