Spaces:
Runtime error
Runtime error
File size: 2,838 Bytes
63337f5 7d0a6ff 11ef280 63337f5 6f9cc9b 7de3632 078b2c2 7de3632 078b2c2 6f9cc9b 63337f5 6f9cc9b 7de3632 63337f5 7de3632 d4376fd 63337f5 d4376fd 63337f5 d4376fd 63337f5 e89f971 db38720 63337f5 db38720 e89f971 63337f5 db38720 63337f5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 |
import streamlit as st
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import PyPDFLoader
from transformers import T5Tokenizer, T5ForConditionalGeneration
from transformers import pipeline
import torch
import base64
#import os
#from dotenv import load_dotenv
#from huggingface_hub import HfApi
#api = HfApi()
#token = api.retrieve_token("secret_token") # Replace with your secret name
#load_dotenv()
#token = os.environ.get("HF_TOKEN")
checkpoint = "MBZUAI/LaMini-Flan-T5-248M"
#model and tokenizer loading
tokenizer = T5Tokenizer.from_pretrained(checkpoint)
base_model = T5ForConditionalGeneration.from_pretrained(checkpoint, device_map='auto', torch_dtype=torch.float32)
#file loader and preprocessing
def file_preprocessing(file):
loader = PyPDFLoader(file)
pages = loader.load_and_split()
text_splitter = RecursiveCharacterTextSplitter(chunk_size=200, chunk_overlap=50)
texts = text_splitter.split_documents(pages)
final_texts = ""
for text in texts:
print(text)
final_texts = final_texts + text.page_content
return final_texts
#LLM pipeline
def llm_pipeline(filepath):
pipe_sum = pipeline(
'summarization',
model = base_model,
tokenizer = tokenizer,
max_length = 500,
min_length = 50)
input_text = file_preprocessing(filepath)
result = pipe_sum(input_text)
result = result[0]['summary_text']
return result
"""
@st.cache_data
#function to display the PDF of a given file
def displayPDF(file):
# Opening file from file path
with open(file, "rb") as f:
base64_pdf = base64.b64encode(f.read()).decode('utf-8')
# Embedding PDF in HTML
pdf_display = F'<iframe src="data:application/pdf;base64,{base64_pdf}" width="100%" height="600" type="application/pdf"></iframe>'
# Displaying File
st.markdown(pdf_display, unsafe_allow_html=True)
"""
#streamlit code
st.set_page_config(layout="wide")
def main():
st.title("Document Summarization App using Language Model")
uploaded_file = st.file_uploader("Upload your PDF file", type=['pdf'])
if uploaded_file is not None:
if st.button("Summarize"):
col2 = st.columns(1)
# Use a temporary filename directly
with tempfile.NamedTemporaryFile(delete=False) as temp_file:
temp_file.write(uploaded_file.read())
temp_file.flush() # Ensure contents are written to disk
filepath = temp_file.name
with col2:
summary = llm_pipeline(filepath)
st.info("Summarization Complete")
st.success(summary)
# Clean up the temporary file
os.remove(filepath)
if __name__ == "__main__":
main() |