Spaces:
Runtime error
Runtime error
File size: 2,324 Bytes
63337f5 7d0a6ff 11ef280 63337f5 6f9cc9b 7de3632 078b2c2 7de3632 078b2c2 6f9cc9b 63337f5 6f9cc9b 7de3632 63337f5 7de3632 d4376fd 63337f5 30bc38f 63337f5 9018ff8 63337f5 e89f971 db38720 63337f5 db38720 e89f971 63337f5 db38720 63337f5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 |
import streamlit as st
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import PyPDFLoader
from transformers import T5Tokenizer, T5ForConditionalGeneration
from transformers import pipeline
import torch
import base64
#import os
#from dotenv import load_dotenv
#from huggingface_hub import HfApi
#api = HfApi()
#token = api.retrieve_token("secret_token") # Replace with your secret name
#load_dotenv()
#token = os.environ.get("HF_TOKEN")
checkpoint = "MBZUAI/LaMini-Flan-T5-248M"
#model and tokenizer loading
tokenizer = T5Tokenizer.from_pretrained(checkpoint)
base_model = T5ForConditionalGeneration.from_pretrained(checkpoint, device_map='auto', torch_dtype=torch.float32)
#file loader and preprocessing
def file_preprocessing(file):
loader = PyPDFLoader(file)
pages = loader.load_and_split()
text_splitter = RecursiveCharacterTextSplitter(chunk_size=200, chunk_overlap=50)
texts = text_splitter.split_documents(pages)
final_texts = ""
for text in texts:
print(text)
final_texts = final_texts + text.page_content
return final_texts
#LLM pipeline
def llm_pipeline(filepath):
pipe_sum = pipeline(
'summarization',
model = base_model,
tokenizer = tokenizer,
max_length = 500,
min_length = 50)
input_text = file_preprocessing(filepath)
result = pipe_sum(input_text)
result = result[0]['summary_text']
return result
def main():
st.title("Document Summarization App using Language Model")
uploaded_file = st.file_uploader("Upload your PDF file", type=['pdf'])
if uploaded_file is not None:
if st.button("Summarize"):
col2 = st.columns(1)
# Use a temporary filename directly
with tempfile.NamedTemporaryFile(delete=False) as temp_file:
temp_file.write(uploaded_file.read())
temp_file.flush() # Ensure contents are written to disk
filepath = temp_file.name
with col2:
summary = llm_pipeline(filepath)
st.info("Summarization Complete")
st.success(summary)
# Clean up the temporary file
os.remove(filepath)
if __name__ == "__main__":
main() |