DevBM's picture
Update app.py
0150ad9 verified
raw
history blame
1.35 kB
import streamlit as st
from transformers import pipeline
from PyPDF2 import PdfReader
import PyPDF2
import os
import nltk
nltk.download('punkt')
def get_pdf_text(pdf_docs):
text = ""
for pdf in pdf_docs:
pdf_reader = PdfReader(pdf)
for page in pdf_reader.pages:
text += page.extract_text()
return text
def main():
st.title('Question Generator from PDFs')
pipe = pipeline(
task = 'text2text-generation',
model = 'ramsrigouthamg/t5_squad_v1'
)
file = st.file_uploader(label='Upload',accept_multiple_files=True)
pr = st.button(label='Start')
if pr:
st.write('Hi')
raw_text = get_pdf_text(file)
sentences = nltk.sent_tokenize(text=raw_text)
# st.write(sts)
# for i in sentences:
# st.write(i)
questions = []
st.subheader("Generated Questions are: ")
s = pipe(sentences)
for i in s:
questions.append(i['generated_text'][10:])
st.write(i['generated_text'][10:])
if st.toggle(label='Show Pipeline Output'):
st.write(s)
if st.toggle(label='Show Questions list'):
st.write(questions)
# for i in sts:
# x = pipe(i)
# questions.append(x)
# st.write(x)
if __name__ == '__main__':
main()