mishrasahil934 commited on
Commit
f225694
·
verified ·
1 Parent(s): fe97d18

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +118 -116
app.py CHANGED
@@ -1,116 +1,118 @@
1
- from dotenv import load_dotenv
2
- load_dotenv()
3
-
4
- import streamlit as st
5
- from langchain.text_splitter import RecursiveCharacterTextSplitter
6
- from langchain.document_loaders import PyPDFLoader,DirectoryLoader
7
- from langchain.chains.summarize import load_summarize_chain
8
- from transformers import pipeline
9
- import torch
10
- import base64
11
-
12
-
13
- # Load model directly
14
- from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
15
-
16
- tokenizer = AutoTokenizer.from_pretrained("MBZUAI/LaMini-Flan-T5-248M")
17
- base_model = AutoModelForSeq2SeqLM.from_pretrained("MBZUAI/LaMini-Flan-T5-248M")
18
-
19
- #file loader and processing
20
- def file_preprocessing(file):
21
- loader = PyPDFLoader(file)
22
- pages = loader.load_and_split()
23
- text_splitter = RecursiveCharacterTextSplitter(chunk_size=300, chunk_overlap=50)
24
- texts = text_splitter.split_documents(pages)
25
- final_texts = ""
26
- for text in texts:
27
- print(text)
28
- final_texts = final_texts + text.page_content
29
- return final_texts
30
-
31
- #lm pipeline
32
- def llm_pipleline(filepath):
33
- pipe_sum = pipeline(
34
- 'summarization',
35
- model = base_model,
36
- tokenizer = tokenizer,
37
- max_length = 500,
38
- min_length = 50
39
- )
40
- input_text = file_preprocessing(filepath)
41
- result = pipe_sum(input_text)
42
- result = result[0]['summary_text']
43
- return result
44
- def llm_pipleline1(ans):
45
- pipe_sum = pipeline(
46
- 'summarization',
47
- model = base_model,
48
- tokenizer = tokenizer,
49
- max_length = 500,
50
- min_length = 50
51
- )
52
- input_text =""+ ans
53
- result = pipe_sum(input_text)
54
- result = result[0]['summary_text']
55
- return result
56
-
57
- @st.cache_data
58
- # Function to display the PDF file
59
- def displayPDF(file):
60
- # Opening file from file path
61
- with open(file, "rb") as f:
62
- base_pdf = base64.b64encode(f.read()).decode('utf-8') # Corrected function name and variable
63
-
64
- # Embedding PDF in HTML
65
- pdf_display = f'<iframe src="data:application/pdf;base64,{base_pdf}" width="100%" height="600" type="application/pdf"></iframe>'
66
-
67
- # Displaying the file
68
- st.markdown(pdf_display, unsafe_allow_html=True)
69
-
70
-
71
- #streamlit code
72
- st.set_page_config(layout='wide')
73
-
74
- def main():
75
-
76
- st.title('Content sumerizer')
77
-
78
- uploaded_file = st.file_uploader("upload your pdf file", type=['pdf'])
79
-
80
- if uploaded_file is not None:
81
- if st.button("summarize"):
82
- col1,col2 = st.columns(2)
83
- filepath = "data/"+uploaded_file.name
84
- with open(filepath, 'wb') as temp_file:
85
- temp_file.write(uploaded_file.read())
86
- with col1:
87
- st.info("uploaded PDF File")
88
- pdf_viewer = displayPDF(filepath)
89
-
90
-
91
- with col2:
92
- st.info("Summarization is below")
93
- summary = llm_pipleline(filepath)
94
- st.success(summary)
95
- else :
96
- print("enter a valid pdf file")
97
-
98
- if st.button("text"):
99
- ans = input("enter your content")
100
- if st.button("Enter"):
101
- col1,col2 = st.columns(2)
102
-
103
- with col1:
104
- st.info("what you have entered")
105
- print(ans)
106
- with col2:
107
- st.info("Summarization is below")
108
- summary1=llm_pipleline1(ans)
109
- st.success(summary1)
110
-
111
-
112
-
113
-
114
-
115
- if __name__ == '__main__':
116
- main() error is ameError: name 'base64_pdf' is not defined
 
 
 
1
+ from dotenv import load_dotenv
2
+ load_dotenv()
3
+ from tempfile import NamedTemporaryFile
4
+ import os
5
+ import streamlit as st
6
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
7
+ from langchain.document_loaders import PyPDFLoader,DirectoryLoader
8
+ from langchain.chains.summarize import load_summarize_chain
9
+ from transformers import pipeline
10
+ import torch
11
+ import base64
12
+
13
+
14
+ # Load model directly
15
+ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
16
+
17
+ tokenizer = AutoTokenizer.from_pretrained("MBZUAI/LaMini-Flan-T5-248M")
18
+ base_model = AutoModelForSeq2SeqLM.from_pretrained("MBZUAI/LaMini-Flan-T5-248M")
19
+
20
+ #file loader and processing
21
+ def file_preprocessing(file):
22
+ loader = PyPDFLoader(file)
23
+ pages = loader.load_and_split()
24
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=300, chunk_overlap=50)
25
+ texts = text_splitter.split_documents(pages)
26
+ final_texts = ""
27
+ for text in texts:
28
+ print(text)
29
+ final_texts = final_texts + text.page_content
30
+ return final_texts
31
+
32
+ #lm pipeline
33
+ def llm_pipleline(filepath):
34
+ pipe_sum = pipeline(
35
+ 'summarization',
36
+ model = base_model,
37
+ tokenizer = tokenizer,
38
+ max_length = 500,
39
+ min_length = 50
40
+ )
41
+ input_text = file_preprocessing(filepath)
42
+ result = pipe_sum(input_text)
43
+ result = result[0]['summary_text']
44
+ return result
45
+ def llm_pipleline1(ans):
46
+ pipe_sum = pipeline(
47
+ 'summarization',
48
+ model = base_model,
49
+ tokenizer = tokenizer,
50
+ max_length = 500,
51
+ min_length = 50
52
+ )
53
+ input_text =""+ ans
54
+ result = pipe_sum(input_text)
55
+ result = result[0]['summary_text']
56
+ return result
57
+
58
+ @st.cache_data
59
+ # Function to display the PDF file
60
+ def displayPDF(file):
61
+ # Opening file from file path
62
+ with open(file, "rb") as f:
63
+ base_pdf = base64.b64encode(f.read()).decode('utf-8') # Corrected function name and variable
64
+
65
+ # Embedding PDF in HTML
66
+ pdf_display = f'<iframe src="data:application/pdf;base64,{base_pdf}" width="100%" height="600" type="application/pdf"></iframe>'
67
+
68
+ # Displaying the file
69
+ st.markdown(pdf_display, unsafe_allow_html=True)
70
+
71
+
72
+ #streamlit code
73
+ st.set_page_config(layout='wide')
74
+
75
+ def main():
76
+ st.title('Content Summarizer')
77
+
78
+ uploaded_file = st.file_uploader("Upload your PDF file", type=['pdf'])
79
+
80
+ if uploaded_file is not None:
81
+ if st.button("Summarize"):
82
+ col1, col2 = st.columns(2)
83
+
84
+ # Save the uploaded file to a temporary location
85
+ with NamedTemporaryFile(delete=False, suffix=".pdf") as temp_file:
86
+ temp_file.write(uploaded_file.read())
87
+ temp_filepath = temp_file.name
88
+
89
+ with col1:
90
+ st.info("Uploaded PDF File")
91
+ pdf_viewer = displayPDF(temp_filepath)
92
+
93
+ with col2:
94
+ st.info("Summarization is below")
95
+ summary = llm_pipleline(temp_filepath)
96
+ st.success(summary)
97
+
98
+ # New Section for Text Input Summarization
99
+ st.header("Summarize Your Text")
100
+ user_input = st.text_area("Enter your content here:", height=200)
101
+
102
+ if st.button("Summarize Text"):
103
+ if user_input.strip():
104
+ col1, col2 = st.columns(2)
105
+
106
+ with col1:
107
+ st.info("Original Content")
108
+ st.write(user_input)
109
+
110
+ with col2:
111
+ st.info("Summarization is below")
112
+ summary = llm_pipleline1(user_input)
113
+ st.success(summary)
114
+ else:
115
+ st.warning("Please enter some content to summarize.")
116
+
117
+ if __name__ == '__main__':
118
+ main()