Spaces:
Sleeping
Sleeping
File size: 2,967 Bytes
f2c8e06 b14bb73 f2c8e06 b14bb73 f2c8e06 b14bb73 f2c8e06 b14bb73 f2c8e06 b14bb73 f2c8e06 b14bb73 f2c8e06 b14bb73 f2c8e06 b14bb73 f2c8e06 b14bb73 f2c8e06 b14bb73 f2c8e06 b14bb73 f2c8e06 b14bb73 f2c8e06 b14bb73 f2c8e06 b14bb73 f2c8e06 b14bb73 f2c8e06 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 |
import os
import base64
from tempfile import NamedTemporaryFile
import streamlit as st
from transformers import pipeline
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
# Load model directly
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
tokenizer = AutoTokenizer.from_pretrained("MBZUAI/LaMini-Flan-T5-248M")
base_model = AutoModelForSeq2SeqLM.from_pretrained("MBZUAI/LaMini-Flan-T5-248M")
# File loader and processing
def file_preprocessing(file):
loader = PyPDFLoader(file)
pages = loader.load_and_split()
text_splitter = RecursiveCharacterTextSplitter(chunk_size=300, chunk_overlap=50)
texts = text_splitter.split_documents(pages)
final_texts = ""
for text in texts:
final_texts += text.page_content
return final_texts
# LLM pipeline for summarization
def llm_pipeline(input_text):
pipe_sum = pipeline(
'summarization',
model=base_model,
tokenizer=tokenizer,
max_length=500,
min_length=50,
)
result = pipe_sum(input_text)
return result[0]['summary_text']
@st.cache_data
# Function to display the PDF file
def displayPDF(file_path):
with open(file_path, "rb") as f:
base64_pdf = base64.b64encode(f.read()).decode('utf-8')
pdf_display = f'<iframe src="data:application/pdf;base64,{base64_pdf}" width="100%" height="600" type="application/pdf"></iframe>'
st.markdown(pdf_display, unsafe_allow_html=True)
# Streamlit App
def main():
st.title('Content Summarizer')
# PDF Upload Section
uploaded_file = st.file_uploader("Upload your PDF file", type=['pdf'])
if uploaded_file is not None:
if st.button("Summarize PDF"):
col1, col2 = st.columns(2)
# Save the uploaded file to a temporary location
with NamedTemporaryFile(delete=False, suffix=".pdf") as temp_file:
temp_file.write(uploaded_file.read())
temp_filepath = temp_file.name
with col1:
st.info("Uploaded PDF File")
displayPDF(temp_filepath)
with col2:
st.info("Summarization")
input_text = file_preprocessing(temp_filepath)
summary = llm_pipeline(input_text)
st.success(summary)
# Text Input Section
st.header("Summarize Your Text")
user_input = st.text_area("Enter your content here:", height=200)
if st.button("Summarize Text"):
if user_input.strip():
col1, col2 = st.columns(2)
with col1:
st.info("Original Content")
st.write(user_input)
with col2:
st.info("Summarization")
summary = llm_pipeline(user_input)
st.success(summary)
else:
st.warning("Please enter some content to summarize.")
if __name__ == '__main__':
main()
|