Spaces:
Sleeping
Sleeping
File size: 2,946 Bytes
f2c8e06 421cd7c f2c8e06 e6a9ac6 f2c8e06 e6a9ac6 f2c8e06 421cd7c f2c8e06 e6a9ac6 421cd7c 668f0b8 e6a9ac6 668f0b8 421cd7c 432a8d9 421cd7c 432a8d9 421cd7c f2c8e06 421cd7c 668f0b8 9dca518 421cd7c f2c8e06 e6a9ac6 f2c8e06 421cd7c f2c8e06 421cd7c f2c8e06 421cd7c f2c8e06 421cd7c f2c8e06 421cd7c f2c8e06 421cd7c 432a8d9 421cd7c 432a8d9 421cd7c f2c8e06 421cd7c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 |
import os
import base64
import tempfile
import streamlit as st
import fitz # PyMuPDF
from transformers import pipeline
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
# Load the summarization model
tokenizer = AutoTokenizer.from_pretrained("MBZUAI/LaMini-Flan-T5-248M")
base_model = AutoModelForSeq2SeqLM.from_pretrained("MBZUAI/LaMini-Flan-T5-248M")
# Function to extract text from a PDF using PyMuPDF
def extract_text_from_pdf(pdf_path):
text = ""
doc = fitz.open(pdf_path)
for page_num in range(doc.page_count):
page = doc.load_page(page_num) # Get a page
text += page.get_text() # Extract text from the page
if text.strip():
return text
return None
# LLM pipeline for summarization
def llm_pipeline(input_text):
pipe_sum = pipeline(
'summarization',
model=base_model,
tokenizer=tokenizer,
max_length=500,
min_length=50,
)
result = pipe_sum(input_text)
return result[0]['summary_text']
@st.cache_data
# Function to display the PDF
def displayPDF(file_path):
with open(file_path, "rb") as f:
base64_pdf = base64.b64encode(f.read()).decode('utf-8')
pdf_display = f'<iframe src="data:application/pdf;base64,{base64_pdf}" width="100%" height="600" type="application/pdf"></iframe>'
st.markdown(pdf_display, unsafe_allow_html=True)
# Streamlit App
def main():
st.title('Content Summarizer')
# PDF Upload Section
uploaded_file = st.file_uploader("Upload your PDF file", type=['pdf'])
if uploaded_file is not None:
if st.button("Summarize PDF"):
col1, col2 = st.columns(2)
# Save the uploaded file to a temporary location
with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf", dir="/tmp/") as temp_file:
temp_file.write(uploaded_file.read())
temp_filepath = temp_file.name
with col1:
st.info("Uploaded PDF File")
displayPDF(temp_filepath)
with col2:
st.info("Summarization")
input_text = extract_text_from_pdf(temp_filepath)
if input_text: # Proceed only if text extraction was successful
summary = llm_pipeline(input_text)
st.success(summary)
# Text Input Section
st.header("Summarize Your Text")
user_input = st.text_area("Enter your content here:", height=200)
if st.button("Summarize Text"):
if user_input.strip():
col1, col2 = st.columns(2)
with col1:
st.info("Original Content")
st.write(user_input)
with col2:
st.info("Summarization")
summary = llm_pipeline(user_input)
st.success(summary)
else:
st.warning("Please enter some content to summarize.")
if __name__ == '__main__':
main()
|