Spaces:
Sleeping
Sleeping
import streamlit as st | |
import PyPDF2 | |
from extractive_model import summarize_with_textrank | |
from nltk.tokenize import sent_tokenize | |
# Set page to wide mode | |
st.set_page_config(layout="wide") | |
# Function to handle file upload and return its content | |
def load_pdf(file): | |
pdf_reader = PyPDF2.PdfReader(file) | |
pdf_text = "" | |
for page_num in range(len(pdf_reader.pages)): | |
pdf_text += pdf_reader.pages[page_num].extract_text() or "" | |
return pdf_text | |
# Function to calculate overlap | |
def calculate_overlap(original_text, summary_text): | |
original_sentences = set(sent_tokenize(original_text)) | |
summary_sentences = set(sent_tokenize(summary_text)) | |
overlap_count = sum(1 for sentence in summary_sentences if sentence in original_sentences) | |
overlap_percentage = (overlap_count / len(original_sentences)) * 100 if original_sentences else 0 | |
return overlap_percentage | |
# Main app | |
def main(): | |
st.title("Terms of Service Summarizer") | |
# Layout: 3 columns | |
col1, col2, col3 = st.columns([1, 3, 2], gap="large") | |
# Left column: Radio buttons for summarizer choice | |
with col1: | |
radio_options = ['Abstractive', 'Extractive'] | |
radio_selection = st.radio("Choose type of summarizer:", radio_options) | |
# Middle column: Text input and File uploader | |
with col2: | |
user_input = st.text_area("Enter your text here:") | |
uploaded_file = st.file_uploader("Upload a PDF", type="pdf") | |
if st.button("Summarize"): | |
if uploaded_file and user_input: | |
st.warning("Please provide either text input or a PDF file, not both.") | |
return | |
# Perform overlap calculation | |
if 'summary' in st.session_state: | |
overlap = calculate_overlap(file_content, st.session_state.summary) | |
st.session_state.overlap = overlap | |
elif uploaded_file: | |
# Extract text from PDF | |
file_content = load_pdf(uploaded_file) | |
st.write("PDF uploaded successfully.") | |
elif user_input: | |
file_content = user_input | |
else: | |
st.warning("Please upload a PDF or enter some text to summarize.") | |
return | |
# Perform extractive summarization | |
if radio_selection == "Extractive": | |
summary = summarize_with_textrank(file_content) | |
st.session_state.summary = summary | |
# Perform extractive summarization | |
if radio_selection == "Abstractive": | |
None | |
#summary = summarize_with_textrank(file_content) | |
#st.session_state.summary = summary | |
# Right column: Displaying text after pressing 'Summarize' | |
with col3: | |
st.write("Summary:") | |
if 'summary' in st.session_state: | |
st.write(st.session_state.summary) | |
# Display overlap percentage | |
if 'overlap' in st.session_state: | |
st.write(f"Overlap with Original Text: {st.session_state.overlap:.2f}%") | |
if __name__ == "__main__": | |
main() | |