File size: 3,667 Bytes
c0b040c
 
9f71231
 
c0b040c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ec9702c
c0b040c
ec9702c
c0b040c
ec9702c
c0b040c
ec9702c
 
 
 
 
c0b040c
 
 
 
9f71231
 
 
 
 
 
f4d2744
c0b040c
f4d2744
 
 
 
c0b040c
f4d2744
 
 
9f71231
 
c0b040c
9f71231
f4d2744
c0b040c
f4d2744
9f71231
f4d2744
c0b040c
 
9f71231
c0b040c
f4d2744
 
 
c0b040c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
from dotenv import load_dotenv
load_dotenv()
import os
from tempfile import NamedTemporaryFile
import streamlit as st
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.document_loaders import PyPDFLoader,DirectoryLoader
from langchain.chains.summarize import load_summarize_chain
from transformers import pipeline
import torch
import base64


# Load model directly
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

tokenizer = AutoTokenizer.from_pretrained("MBZUAI/LaMini-Flan-T5-248M")
base_model = AutoModelForSeq2SeqLM.from_pretrained("MBZUAI/LaMini-Flan-T5-248M")

#file loader and processing 
def file_preprocessing(file):
    loader = PyPDFLoader(file)
    pages = loader.load_and_split()
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=300, chunk_overlap=50)
    texts = text_splitter.split_documents(pages)
    final_texts = ""
    for text in texts:
        print(text)
        final_texts = final_texts + text.page_content
    return final_texts

#lm pipeline
def llm_pipleline(filepath):
    pipe_sum = pipeline(
        'summarization',
        model = base_model,
        tokenizer = tokenizer,
        max_length = 500,
        min_length = 50      
    )
    input_text = file_preprocessing(filepath)
    result = pipe_sum(input_text)
    result = result[0]['summary_text']
    return result
def llm_pipleline1(ans):
    pipe_sum = pipeline(
        'summarization',
        model = base_model,
        tokenizer = tokenizer,
        max_length = 500,
        min_length = 50      
    )
    input_text =""+ ans
    result = pipe_sum(input_text)
    result = result[0]['summary_text']
    return result

@st.cache_data
# Function to display the PDF file
def displayPDF(file):
    # Opening file from file path
    with open(file, "rb") as f:
        base_pdf = base64.b64encode(f.read()).decode('utf-8')  # Corrected function name and variable

    # Embedding PDF in HTML
    pdf_display = f'<iframe src="data:application/pdf;base64,{base_pdf}" width="100%" height="600" type="application/pdf"></iframe>'
    
    # Displaying the file
    st.markdown(pdf_display, unsafe_allow_html=True)

#streamlit code
st.set_page_config(layout='wide')



def main():
    st.title('Content Summarizer')

    uploaded_file = st.file_uploader("Upload your PDF file", type=['pdf'])

def main():
    st.title('Content Summarizer')

    uploaded_file = st.file_uploader("Upload your PDF file", type=['pdf'])

    if uploaded_file is not None:
        if st.button("Summarize"):
            col1, col2 = st.columns(2)

            # Save the uploaded file to a temporary location
            with NamedTemporaryFile(delete=False, suffix=".pdf") as temp_file:
                temp_file.write(uploaded_file.read())
                temp_filepath = temp_file.name

            with col1:
                st.info("Uploaded PDF File")
                pdf_viewer = displayPDF(temp_filepath)

            with col2:
                st.info("Summarization is below")
                summary = llm_pipleline(temp_filepath)
                st.success(summary)
    else:
        st.warning("Please upload a valid PDF file.")


    if st.button("text"):
        ans = input("enter your content")
        if st.button("Enter"):
            col1,col2 = st.columns(2)

            with col1:
                st.info("what you have entered")
                print(ans)                   
            with col2:
                st.info("Summarization is below")
                summary1=llm_pipleline1(ans)
                st.success(summary1)



                    
            
if __name__ == '__main__':
    main()