File size: 2,967 Bytes
f2c8e06
b14bb73
 
f2c8e06
 
b14bb73
 
f2c8e06
 
 
 
 
 
 
b14bb73
f2c8e06
 
 
 
 
 
 
b14bb73
f2c8e06
 
b14bb73
 
f2c8e06
 
b14bb73
 
 
 
f2c8e06
 
b14bb73
f2c8e06
 
 
b14bb73
 
 
 
f2c8e06
 
b14bb73
f2c8e06
 
 
b14bb73
f2c8e06
 
b14bb73
f2c8e06
 
 
 
 
 
 
 
 
b14bb73
f2c8e06
 
b14bb73
 
 
f2c8e06
 
b14bb73
f2c8e06
 
 
 
 
 
 
 
 
 
 
b14bb73
 
f2c8e06
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
import os
import base64
from tempfile import NamedTemporaryFile
import streamlit as st
from transformers import pipeline
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

# Load model directly
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

tokenizer = AutoTokenizer.from_pretrained("MBZUAI/LaMini-Flan-T5-248M")
base_model = AutoModelForSeq2SeqLM.from_pretrained("MBZUAI/LaMini-Flan-T5-248M")

# File loader and processing
def file_preprocessing(file):
    loader = PyPDFLoader(file)
    pages = loader.load_and_split()
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=300, chunk_overlap=50)
    texts = text_splitter.split_documents(pages)
    final_texts = ""
    for text in texts:
        final_texts += text.page_content
    return final_texts

# LLM pipeline for summarization
def llm_pipeline(input_text):
    pipe_sum = pipeline(
        'summarization',
        model=base_model,
        tokenizer=tokenizer,
        max_length=500,
        min_length=50,
    )
    result = pipe_sum(input_text)
    return result[0]['summary_text']

@st.cache_data
# Function to display the PDF file
def displayPDF(file_path):
    with open(file_path, "rb") as f:
        base64_pdf = base64.b64encode(f.read()).decode('utf-8')
    pdf_display = f'<iframe src="data:application/pdf;base64,{base64_pdf}" width="100%" height="600" type="application/pdf"></iframe>'
    st.markdown(pdf_display, unsafe_allow_html=True)

# Streamlit App
def main():
    st.title('Content Summarizer')

    # PDF Upload Section
    uploaded_file = st.file_uploader("Upload your PDF file", type=['pdf'])
    if uploaded_file is not None:
        if st.button("Summarize PDF"):
            col1, col2 = st.columns(2)

            # Save the uploaded file to a temporary location
            with NamedTemporaryFile(delete=False, suffix=".pdf") as temp_file:
                temp_file.write(uploaded_file.read())
                temp_filepath = temp_file.name

            with col1:
                st.info("Uploaded PDF File")
                displayPDF(temp_filepath)

            with col2:
                st.info("Summarization")
                input_text = file_preprocessing(temp_filepath)
                summary = llm_pipeline(input_text)
                st.success(summary)

    # Text Input Section
    st.header("Summarize Your Text")
    user_input = st.text_area("Enter your content here:", height=200)
    if st.button("Summarize Text"):
        if user_input.strip():
            col1, col2 = st.columns(2)

            with col1:
                st.info("Original Content")
                st.write(user_input)

            with col2:
                st.info("Summarization")
                summary = llm_pipeline(user_input)
                st.success(summary)
        else:
            st.warning("Please enter some content to summarize.")

if __name__ == '__main__':
    main()