Spaces:

ms1449
/

Week3Day2Task

Sleeping

App Files Files Community

ms1449 commited on Jul 23, 2024

Commit

9a5450f

verified ·

1 Parent(s): 45acf72

Update app.py

Browse files

Files changed (1) hide show

app.py +50 -25

app.py CHANGED Viewed

@@ -1,38 +1,63 @@
 import streamlit as st
-from transformers import pipeline, AutoTokenizer
-import torch
-# model
-model = "facebook/bart-large-cnn"
 @st.cache_resource
-def load_summarizer():
-    return pipeline("summarization", model=model)
-def get_model_max_length(model_name):
-        tokenizer = AutoTokenizer.from_pretrained(model_name)
-        return tokenizer.model_max_length
-def generate_summary(text):
-    summarizer = load_summarizer()
-    max_input_length = min(summarizer.tokenizer.model_max_length, 1024)
-    truncated_text = summarizer.tokenizer.decode(summarizer.tokenizer.encode(text, truncation=True, max_length=max_input_length))
-    summary = summarizer(truncated_text, max_new_tokens=150, min_new_tokens=40, do_sample=False)[0]['summary_text']
     return summary
-st.title("A simple text-summarization-tool")
-st.write("Using the BART-large-CNN model.")
-input_text = st.text_area("Enter the text:", height=200)
-if st.button("Generate Summary"):
     if input_text:
         with st.spinner("Generating summary..."):
-            summary = generate_summary(input_text)
         st.subheader("Summary:")
         st.write(summary)
     else:
-        st.warning("Please enter text to summarize.")
-st.write("---")

 import streamlit as st
+from transformers import BartTokenizer, BartForConditionalGeneration
 @st.cache_resource
+def load_model():
+    model_path = "bart_small_samsum"  # Update this if your model path is different
+    tokenizer = BartTokenizer.from_pretrained(model_path)
+    model = BartForConditionalGeneration.from_pretrained(model_path)
+    return tokenizer, model
+# Set maximum lengths for input and target sequences
+max_input_length = 128
+max_target_length = 64
+def summarize(input_text, tokenizer, model):
+    # Tokenize input text
+    inputs = tokenizer(input_text, return_tensors="pt", max_length=max_input_length, truncation=True)
+    # Generate summary
+    summary_ids = model.generate(
+        inputs["input_ids"],
+        max_length=max_target_length,
+        min_length=30,
+        length_penalty=2.0,
+        num_beams=4,
+        early_stopping=True
+    )
+    # Decode the generated summary
+    summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
     return summary
+# Streamlit app
+st.title("Summarization Tool Using Bart-small Finetuned on Small sized Samsum Dataset")
+# Load model
+tokenizer, model = load_model()
+# Text input
+input_text = st.text_area("Enter your dialogue here:", height=200)
+if st.button("Summarize"):
     if input_text:
         with st.spinner("Generating summary..."):
+            summary = summarize(input_text, tokenizer, model)
         st.subheader("Summary:")
         st.write(summary)
     else:
+        st.warning("Please enter some text to summarize.")
+# Add some information about the model
+st.sidebar.header("About")
+st.sidebar.info(
+    "This app uses a fine-tuned BART-Small model to summarize dialogues. "
+    "Enter your dialogue in the text area and click 'Summarize' to generate a summary."
+)
+# You can add more information or customization in the sidebar
+st.sidebar.header("Model Details")
+st.sidebar.text("Model: BART-small")
+st.sidebar.text("Max Input Length: 128 tokens")
+st.sidebar.text("Max Summary Length: 64 tokens")