mishrasahil934 commited on
Commit
fe97d18
·
verified ·
1 Parent(s): 9293e13
Files changed (1) hide show
  1. app.py +116 -116
app.py CHANGED
@@ -1,116 +1,116 @@
1
- from dotenv import load_dotenv
2
- load_dotenv()
3
-
4
- import streamlit as st
5
- from langchain.text_splitter import RecursiveCharacterTextSplitter
6
- from langchain.document_loaders import PyPDFLoader,DirectoryLoader
7
- from langchain.chains.summarize import load_summarize_chain
8
- from transformers import pipeline
9
- import torch
10
- import base64
11
-
12
-
13
- # Load model directly
14
- from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
15
-
16
- tokenizer = AutoTokenizer.from_pretrained("MBZUAI/LaMini-Flan-T5-248M")
17
- base_model = AutoModelForSeq2SeqLM.from_pretrained("MBZUAI/LaMini-Flan-T5-248M")
18
-
19
- #file loader and processing
20
- def file_preprocessing(file):
21
- loader = PyPDFLoader(file)
22
- pages = loader.load_and_split()
23
- text_splitter = RecursiveCharacterTextSplitter(chunk_size=300, chunk_overlap=50)
24
- texts = text_splitter.split_documents(pages)
25
- final_texts = ""
26
- for text in texts:
27
- print(text)
28
- final_texts = final_texts + text.page_content
29
- return final_texts
30
-
31
- #lm pipeline
32
- def llm_pipleline(filepath):
33
- pipe_sum = pipeline(
34
- 'summarization',
35
- model = base_model,
36
- tokenizer = tokenizer,
37
- max_length = 500,
38
- min_length = 50
39
- )
40
- input_text = file_preprocessing(filepath)
41
- result = pipe_sum(input_text)
42
- result = result[0]['summary_text']
43
- return result
44
- def llm_pipleline1(ans):
45
- pipe_sum = pipeline(
46
- 'summarization',
47
- model = base_model,
48
- tokenizer = tokenizer,
49
- max_length = 500,
50
- min_length = 50
51
- )
52
- input_text =""+ ans
53
- result = pipe_sum(input_text)
54
- result = result[0]['summary_text']
55
- return result
56
-
57
- @st.cache_data
58
- # Function to display the PDF file
59
- def displayPDF(file):
60
- # Opening file from file path
61
- with open(file, "rb") as f:
62
- base_pdf = base64.b64encode(f.read()).decode('utf-8') # Corrected function name and variable
63
-
64
- # Embedding PDF in HTML
65
- pdf_display = f'<iframe src="data:application/pdf;base64,{base_pdf}" width="100%" height="600" type="application/pdf"></iframe>'
66
-
67
- # Displaying the file
68
- st.markdown(pdf_display, unsafe_allow_html=True)
69
-
70
-
71
- #streamlit code
72
- st.set_page_config(layout='wide')
73
-
74
- def main():
75
-
76
- st.title('Content sumerizer')
77
-
78
- uploaded_file = st.file_uploader("upload your pdf file", type=['pdf'])
79
-
80
- if uploaded_file is not None:
81
- if st.button("summarize"):
82
- col1,col2 = st.columns(2)
83
- filepath = uploaded_file.name
84
- with open(filepath, 'wb') as temp_file:
85
- temp_file.write(uploaded_file.read())
86
- with col1:
87
- st.info("uploaded PDF File")
88
- pdf_viewer = displayPDF(filepath)
89
-
90
-
91
- with col2:
92
- st.info("Summarization is below")
93
- summary = llm_pipleline(filepath)
94
- st.success(summary)
95
- else :
96
- print("enter a valid pdf file")
97
-
98
- if st.button("text"):
99
- ans = input("enter your content")
100
- if st.button("Enter"):
101
- col1,col2 = st.columns(2)
102
-
103
- with col1:
104
- st.info("what you have entered")
105
- print(ans)
106
- with col2:
107
- st.info("Summarization is below")
108
- summary1=llm_pipleline1(ans)
109
- st.success(summary1)
110
-
111
-
112
-
113
-
114
-
115
- if __name__ == '__main__':
116
- main()
 
1
+ from dotenv import load_dotenv
2
+ load_dotenv()
3
+
4
+ import streamlit as st
5
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
6
+ from langchain.document_loaders import PyPDFLoader,DirectoryLoader
7
+ from langchain.chains.summarize import load_summarize_chain
8
+ from transformers import pipeline
9
+ import torch
10
+ import base64
11
+
12
+
13
+ # Load model directly
14
+ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
15
+
16
+ tokenizer = AutoTokenizer.from_pretrained("MBZUAI/LaMini-Flan-T5-248M")
17
+ base_model = AutoModelForSeq2SeqLM.from_pretrained("MBZUAI/LaMini-Flan-T5-248M")
18
+
19
+ #file loader and processing
20
+ def file_preprocessing(file):
21
+ loader = PyPDFLoader(file)
22
+ pages = loader.load_and_split()
23
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=300, chunk_overlap=50)
24
+ texts = text_splitter.split_documents(pages)
25
+ final_texts = ""
26
+ for text in texts:
27
+ print(text)
28
+ final_texts = final_texts + text.page_content
29
+ return final_texts
30
+
31
+ #lm pipeline
32
+ def llm_pipleline(filepath):
33
+ pipe_sum = pipeline(
34
+ 'summarization',
35
+ model = base_model,
36
+ tokenizer = tokenizer,
37
+ max_length = 500,
38
+ min_length = 50
39
+ )
40
+ input_text = file_preprocessing(filepath)
41
+ result = pipe_sum(input_text)
42
+ result = result[0]['summary_text']
43
+ return result
44
+ def llm_pipleline1(ans):
45
+ pipe_sum = pipeline(
46
+ 'summarization',
47
+ model = base_model,
48
+ tokenizer = tokenizer,
49
+ max_length = 500,
50
+ min_length = 50
51
+ )
52
+ input_text =""+ ans
53
+ result = pipe_sum(input_text)
54
+ result = result[0]['summary_text']
55
+ return result
56
+
57
+ @st.cache_data
58
+ # Function to display the PDF file
59
+ def displayPDF(file):
60
+ # Opening file from file path
61
+ with open(file, "rb") as f:
62
+ base_pdf = base64.b64encode(f.read()).decode('utf-8') # Corrected function name and variable
63
+
64
+ # Embedding PDF in HTML
65
+ pdf_display = f'<iframe src="data:application/pdf;base64,{base_pdf}" width="100%" height="600" type="application/pdf"></iframe>'
66
+
67
+ # Displaying the file
68
+ st.markdown(pdf_display, unsafe_allow_html=True)
69
+
70
+
71
+ #streamlit code
72
+ st.set_page_config(layout='wide')
73
+
74
+ def main():
75
+
76
+ st.title('Content sumerizer')
77
+
78
+ uploaded_file = st.file_uploader("upload your pdf file", type=['pdf'])
79
+
80
+ if uploaded_file is not None:
81
+ if st.button("summarize"):
82
+ col1,col2 = st.columns(2)
83
+ filepath = "data/"+uploaded_file.name
84
+ with open(filepath, 'wb') as temp_file:
85
+ temp_file.write(uploaded_file.read())
86
+ with col1:
87
+ st.info("uploaded PDF File")
88
+ pdf_viewer = displayPDF(filepath)
89
+
90
+
91
+ with col2:
92
+ st.info("Summarization is below")
93
+ summary = llm_pipleline(filepath)
94
+ st.success(summary)
95
+ else :
96
+ print("enter a valid pdf file")
97
+
98
+ if st.button("text"):
99
+ ans = input("enter your content")
100
+ if st.button("Enter"):
101
+ col1,col2 = st.columns(2)
102
+
103
+ with col1:
104
+ st.info("what you have entered")
105
+ print(ans)
106
+ with col2:
107
+ st.info("Summarization is below")
108
+ summary1=llm_pipleline1(ans)
109
+ st.success(summary1)
110
+
111
+
112
+
113
+
114
+
115
+ if __name__ == '__main__':
116
+ main() error is ameError: name 'base64_pdf' is not defined