Spaces:

ashhadahsan
/

summarizer-space

Running

App Files Files Community

ashhadahsan commited on Mar 1, 2023

Commit

4e736ad

1 Parent(s): 8f190f9

added the classification model

Browse files

Files changed (1) hide show

app.py +108 -10

app.py CHANGED Viewed

@@ -4,18 +4,44 @@ from transformers import pipeline
 from stqdm import stqdm
 from simplet5 import SimpleT5
 from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
-@st.cache
 def load_t5():
     model = AutoModelForSeq2SeqLM.from_pretrained("t5-base")
     tokenizer = AutoTokenizer.from_pretrained("t5-base")
     return model, tokenizer
-@st.cache
 def custom_model():
     return pipeline("summarization", model="my_awesome_sum/")
@@ -25,11 +51,20 @@ def convert_df(df):
     return df.to_csv(index=False).encode("utf-8")
-@st.cache
 def load_one_line_summarizer(model):
     return model.load_model("t5", "snrspeaks/t5-one-line-summary")
 st.set_page_config(layout="wide", page_title="Amazon Review Summarizer")
 st.title("Amazon Review Summarizer")
@@ -38,6 +73,7 @@ summarizer_option = st.selectbox(
     "Select Summarizer",
     ("Custom trained on the dataset", "t5-base", "t5-one-line-summary"),
 )
 hide_streamlit_style = """
             <style>
             #MainMenu {visibility: hidden;}
@@ -63,8 +99,7 @@ if st.button("Process"):
             text = df["text"].values.tolist()
             if summarizer_option == "Custom trained on the dataset":
                 model = custom_model()
-                print(summarizer_option)
                 progress_text = "Summarization in progress. Please wait."
                 summary = []
@@ -82,11 +117,31 @@ if st.button("Process"):
                 output = pd.DataFrame(
                     {"text": df["text"].values.tolist(), "summary": summary}
                 )
                 csv = convert_df(output)
                 st.download_button(
                     label="Download data as CSV",
                     data=csv,
-                    file_name=f"{summarizer_option}_df.csv",
                     mime="text/csv",
                 )
             if summarizer_option == "t5-base":
@@ -115,11 +170,31 @@ if st.button("Process"):
                 output = pd.DataFrame(
                     {"text": df["text"].values.tolist(), "summary": summary}
                 )
                 csv = convert_df(output)
                 st.download_button(
                     label="Download data as CSV",
                     data=csv,
-                    file_name=f"{summarizer_option}_df.csv",
                     mime="text/csv",
                 )
@@ -136,16 +211,39 @@ if st.button("Process"):
                 output = pd.DataFrame(
                     {"text": df["text"].values.tolist(), "summary": summary}
                 )
                 csv = convert_df(output)
                 st.download_button(
                     label="Download data as CSV",
                     data=csv,
-                    file_name=f"{summarizer_option}_df.csv",
                     mime="text/csv",
                 )
         except KeyError:
             st.error(
                 "Please Make sure that your data must have a column named text",
                 icon="🚨",
             )
             st.info("Text column must have amazon reviews", icon="ℹ️")

 from stqdm import stqdm
 from simplet5 import SimpleT5
 from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
+from transformers import BertTokenizer
+from tensorflow.keras.models import load_model
+from tensorflow.nn import softmax
+import numpy as np
+from datetime import datetime
+import logging
+date = datetime.now().strftime(r"%Y-%m-%d")
+model_classes = {
+    0: "Ads",
+    1: "Apps",
+    2: "Battery",
+    3: "Charging",
+    4: "Delivery",
+    5: "Display",
+    6: "FOS",
+    7: "HW",
+    8: "Order",
+    9: "Refurb",
+    10: "SD",
+    11: "Setup",
+    12: "Unknown",
+    13: "WiFi",
+}
+@st.cache_resource
 def load_t5():
     model = AutoModelForSeq2SeqLM.from_pretrained("t5-base")
     tokenizer = AutoTokenizer.from_pretrained("t5-base")
+    st.success("Loaded T5 Model")
     return model, tokenizer
+@st.cache_resource
 def custom_model():
+    st.success("Loaded custom model")
     return pipeline("summarization", model="my_awesome_sum/")
     return df.to_csv(index=False).encode("utf-8")
+@st.cache_resource
 def load_one_line_summarizer(model):
+    st.success("Loaded one line summarizer")
     return model.load_model("t5", "snrspeaks/t5-one-line-summary")
+@st.cache_resource
+def classify_category():
+    tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
+    new_model = load_model("model")
+    st.success("Loaded custom classification model")
+    return tokenizer, new_model
 st.set_page_config(layout="wide", page_title="Amazon Review Summarizer")
 st.title("Amazon Review Summarizer")
     "Select Summarizer",
     ("Custom trained on the dataset", "t5-base", "t5-one-line-summary"),
 )
+classification = st.checkbox("Classify Category", value=True)
 hide_streamlit_style = """
             <style>
             #MainMenu {visibility: hidden;}
             text = df["text"].values.tolist()
             if summarizer_option == "Custom trained on the dataset":
                 model = custom_model()
                 progress_text = "Summarization in progress. Please wait."
                 summary = []
                 output = pd.DataFrame(
                     {"text": df["text"].values.tolist(), "summary": summary}
                 )
+                if classification:
+                    classification_token, classification_model = classify_category()
+                    tf_batch = classification_token(
+                        text,
+                        max_length=128,
+                        padding=True,
+                        truncation=True,
+                        return_tensors="tf",
+                    )
+                    with st.spinner(text="identifying theme"):
+                        tf_outputs = classification_model(tf_batch)
+                    classes = []
+                    with st.spinner(text="creating output file"):
+                        for x in stqdm(range(len(text))):
+                            tf_o = softmax(tf_outputs["logits"][x], axis=-1)
+                            label = np.argmax(tf_o, axis=0)
+                            keys = model_classes
+                            classes.append(keys.get(label))
+                        output["category"] = classes
                 csv = convert_df(output)
                 st.download_button(
                     label="Download data as CSV",
                     data=csv,
+                    file_name=f"{summarizer_option}_{date}_df.csv",
                     mime="text/csv",
                 )
             if summarizer_option == "t5-base":
                 output = pd.DataFrame(
                     {"text": df["text"].values.tolist(), "summary": summary}
                 )
+                if classification:
+                    classification_token, classification_model = classify_category()
+                    tf_batch = classification_token(
+                        text,
+                        max_length=128,
+                        padding=True,
+                        truncation=True,
+                        return_tensors="tf",
+                    )
+                    with st.spinner(text="identifying theme"):
+                        tf_outputs = classification_model(tf_batch)
+                    classes = []
+                    with st.spinner(text="creating output file"):
+                        for x in stqdm(range(len(text))):
+                            tf_o = tf.nn.softmax(tf_outputs["logits"][x], axis=-1)
+                            label = np.argmax(tf_o, axis=0)
+                            keys = model_classes
+                            classes.append(keys.get(label))
+                        output["category"] = classes
                 csv = convert_df(output)
                 st.download_button(
                     label="Download data as CSV",
                     data=csv,
+                    file_name=f"{summarizer_option}_{date}_df.csv",
                     mime="text/csv",
                 )
                 output = pd.DataFrame(
                     {"text": df["text"].values.tolist(), "summary": summary}
                 )
+                if classification:
+                    classification_token, classification_model = classify_category()
+                    tf_batch = classification_token(
+                        text,
+                        max_length=128,
+                        padding=True,
+                        truncation=True,
+                        return_tensors="tf",
+                    )
+                    with st.spinner(text="identifying theme"):
+                        tf_outputs = classification_model(tf_batch)
+                    classes = []
+                    with st.spinner(text="creating output file"):
+                        for x in stqdm(range(len(text))):
+                            tf_o = tf.nn.softmax(tf_outputs["logits"][x], axis=-1)
+                            label = np.argmax(tf_o, axis=0)
+                            keys = model_classes
+                            classes.append(keys.get(label))
+                        output["category"] = classes
                 csv = convert_df(output)
                 st.download_button(
                     label="Download data as CSV",
                     data=csv,
+                    file_name=f"{summarizer_option}_{date}_df.csv",
                     mime="text/csv",
                 )
         except KeyError:
             st.error(
                 "Please Make sure that your data must have a column named text",
                 icon="🚨",
             )
             st.info("Text column must have amazon reviews", icon="ℹ️")
+        except BaseException as e:
+            logging.exception("An exception was occured")