Spaces:

Mia2024
/

CS5100TextSummarization

Sleeping

App Files Files Community

nan-motherboard commited on Dec 5, 2024

Commit

fa77629

1 Parent(s): d38c13b

final

Browse files

Files changed (3) hide show

__pycache__/utils.cpython-311.pyc +0 -0
app.py +50 -2
utils.py +57 -0

__pycache__/utils.cpython-311.pyc ADDED Viewed

Binary file (3.96 kB). View file

app.py CHANGED Viewed

@@ -1,4 +1,52 @@
 import streamlit as st
-x = st.slider('Select a value')
-st.write(x, 'squared is', x * x)

 import streamlit as st
+from utils import generate_summary
+# Initialize session state variables
+if "clicked" not in st.session_state:
+    st.session_state.clicked = False
+if "input_text" not in st.session_state:
+    st.session_state.input_text = ""
+if "generated_summary" not in st.session_state:
+    st.session_state.generated_summary = ""
+st.title("Dialogue Text Summarization")
+st.write("---")
+height = 200
+# Text area with session state
+input_text = st.text_area("Dialogue", height=height, key="input_text")
+# Submit button logic
+if st.button("Submit"):
+    if st.session_state.input_text.strip() == "":
+        st.error("Please enter a dialogue!")
+    else:
+        st.write("---")
+        st.write("## Summary")
+        st_container = st.empty()
+        st_info_container = st.empty()
+        # Generate summary and store it in session state
+        st.session_state.generated_summary = generate_summary(
+            " ".join(st.session_state.input_text.split()),
+            st_container,
+            st_info_container
+        )
+# Display the generated summary
+if st.session_state.generated_summary:
+    st.write(st.session_state.generated_summary)
+# Clear button logic
+def clear_all():
+    st.session_state.clicked = True
+    st.session_state.input_text = ""  # Clear input text
+    st.session_state.generated_summary = ""  # Clear summary
+st.button("Clear", on_click=clear_all)
+# Logic for clearing display
+if st.session_state.clicked:
+    st.session_state.clicked = False
+    st.experimental_rerun()

utils.py ADDED Viewed

	@@ -0,0 +1,57 @@

+import torch
+from transformers import AutoTokenizer, GenerationConfig, TextStreamer, AutoModelForSeq2SeqLM
+from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
+import time
+checkpoint = "Mia2024/CS5100TextSummarization"
+checkpoint = "facebook/bart-large-cnn"
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+class StreamlitTextStreamer(TextStreamer):
+    def __init__(self, tokenizer, st_container, st_info_container, skip_prompt=False, **decode_kwargs):
+        super().__init__(tokenizer, skip_prompt, **decode_kwargs)
+        self.st_container = st_container
+        self.st_info_container = st_info_container
+        self.text = ""
+        self.start_time = None
+        self.first_token_time = None
+        self.total_tokens = 0
+    def on_finalized_text(self, text: str, stream_end: bool=False):
+        if self.start_time is None:
+            self.start_time = time.time()
+        if self.first_token_time is None and len(text.strip()) > 0:
+            self.first_token_time = time.time()
+        self.text += text
+        self.total_tokens += len(text.split())
+        self.st_container.markdown("###### " + self.text)
+        time.sleep(0.03)
+def generate_summary(input_text, st_container, st_info_container) -> str:
+    generation_config = GenerationConfig(
+            min_new_tokens=10,
+            max_new_tokens=256,
+            temperature=0.9,
+            top_p=1.0,
+            top_k=50
+        )
+    tokenizer = AutoTokenizer.from_pretrained(checkpoint)
+    model = AutoModelForSeq2SeqLM.from_pretrained(checkpoint).to(device)
+    prefix = "Summarize the following conversation: \n###\n"
+    suffix = "\n### Summary:"
+    target_length = max(1, int(0.15 * len(input_text.split())))
+    input_ids = tokenizer.encode(prefix + input_text + f"The generated summary should be around {target_length} words." + suffix, return_tensors="pt")
+    # Initialize the Streamlit container and streamer
+    streamer = StreamlitTextStreamer(tokenizer, st_container, st_info_container, skip_special_tokens=True, decoder_start_token_id=3)
+    model.generate(input_ids, streamer=streamer, do_sample=True, generation_config=generation_config)