Spaces:
Running
Running
mishrasahil934
commited on
Create app.py
Browse files
app.py
CHANGED
@@ -2,23 +2,22 @@ import os
|
|
2 |
import base64
|
3 |
import tempfile
|
4 |
import streamlit as st
|
5 |
-
|
6 |
-
from PyPDF2 import PdfReader
|
7 |
|
|
|
8 |
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
|
9 |
|
10 |
# Load the summarization model
|
11 |
tokenizer = AutoTokenizer.from_pretrained("MBZUAI/LaMini-Flan-T5-248M")
|
12 |
base_model = AutoModelForSeq2SeqLM.from_pretrained("MBZUAI/LaMini-Flan-T5-248M")
|
13 |
|
14 |
-
# Function to extract text from a PDF using
|
15 |
def extract_text_from_pdf(pdf_path):
|
16 |
-
reader = PdfReader(pdf_path)
|
17 |
text = ""
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
if text.strip():
|
23 |
return text
|
24 |
return None
|
@@ -45,7 +44,7 @@ def displayPDF(file_path):
|
|
45 |
|
46 |
# Streamlit App
|
47 |
def main():
|
48 |
-
st.title('
|
49 |
|
50 |
# PDF Upload Section
|
51 |
uploaded_file = st.file_uploader("Upload your PDF file", type=['pdf'])
|
|
|
2 |
import base64
|
3 |
import tempfile
|
4 |
import streamlit as st
|
5 |
+
import fitz # PyMuPDF
|
|
|
6 |
|
7 |
+
from transformers import pipeline
|
8 |
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
|
9 |
|
10 |
# Load the summarization model
|
11 |
tokenizer = AutoTokenizer.from_pretrained("MBZUAI/LaMini-Flan-T5-248M")
|
12 |
base_model = AutoModelForSeq2SeqLM.from_pretrained("MBZUAI/LaMini-Flan-T5-248M")
|
13 |
|
14 |
+
# Function to extract text from a PDF using PyMuPDF
|
15 |
def extract_text_from_pdf(pdf_path):
|
|
|
16 |
text = ""
|
17 |
+
doc = fitz.open(pdf_path)
|
18 |
+
for page_num in range(doc.page_count):
|
19 |
+
page = doc.load_page(page_num) # Get a page
|
20 |
+
text += page.get_text() # Extract text from the page
|
21 |
if text.strip():
|
22 |
return text
|
23 |
return None
|
|
|
44 |
|
45 |
# Streamlit App
|
46 |
def main():
|
47 |
+
st.title('Content Summarizer')
|
48 |
|
49 |
# PDF Upload Section
|
50 |
uploaded_file = st.file_uploader("Upload your PDF file", type=['pdf'])
|