devaldicaliesta commited on
Commit
f3ee207
·
1 Parent(s): dea95c0

first commit

Browse files
Files changed (2) hide show
  1. app.py +58 -0
  2. requirements.txt +14 -0
app.py ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
3
+ from langchain.document_loaders import PyPDFLoader
4
+ from transformers import T5Tokenizer, T5ForConditionalGeneration
5
+ from transformers import pipeline
6
+ import torch
7
+ import base64
8
+ import time
9
+ from PIL import Image
10
+
11
+ # Model and tokenizer
12
+ model_checkpoint = "MBZUAI/LaMini-Flan-T5-783M"
13
+ model_tokenizer = T5Tokenizer.from_pretrained(model_checkpoint)
14
+ model = T5ForConditionalGeneration.from_pretrained(model_checkpoint)
15
+
16
+ # File loader and preprocessing
17
+ def preprocess_pdf(file):
18
+ loader = PyPDFLoader(file)
19
+ pages = loader.load_and_split()
20
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=170, chunk_overlap=70)
21
+ texts = text_splitter.split_documents(pages)
22
+ final_text = ""
23
+ for text in texts:
24
+ final_text = final_text + text.page_content
25
+ return final_text
26
+
27
+ @st.cache_data
28
+ def language_model_pipeline(filepath):
29
+ summarization_pipeline = pipeline(
30
+ 'summarization',
31
+ model=model,
32
+ tokenizer=model_tokenizer,
33
+ max_length=500,
34
+ min_length=32
35
+ )
36
+ input_text = preprocess_pdf(filepath)
37
+ summary_result = summarization_pipeline(input_text)
38
+ summarized_text = summary_result[0]['summary_text']
39
+ return summarized_text
40
+
41
+ # User interface
42
+ title = st.title("PDF Summarization")
43
+ uploaded_file = st.file_uploader('Upload your PDF file', type=['pdf'])
44
+
45
+ if uploaded_file is not None:
46
+ st.success("File uploaded")
47
+
48
+ if st.button("Summarize"):
49
+ with st.spinner("Summarizing..."):
50
+ time.sleep(10)
51
+
52
+ filepath = uploaded_file.name
53
+ with open(filepath, "wb") as temp_file:
54
+ temp_file.write(uploaded_file.read())
55
+
56
+ summarized_result = language_model_pipeline(filepath)
57
+ st.success("Summary:")
58
+ st.write(summarized_result)
requirements.txt ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ langchain
2
+ sentence_transformers
3
+ torch
4
+ sentencepiece
5
+ transformers
6
+ accelerate
7
+ chromadb
8
+ pypdf
9
+ tiktoken
10
+ streamlit
11
+ fastapi
12
+ uvicorn
13
+ python-multipart
14
+ aiofiles