import streamlit as st from transformers import pipeline from PyPDF2 import PdfReader import PyPDF2 import fitz import os import nltk def get_pdf_text(pdf_docs): text = "" for pdf in pdf_docs: pdf_reader = PdfReader(pdf) for page in pdf_reader.pages: text += page.extract_text() return text def main(): st.title('Question Generator from PDFs') pipe = pipeline( task = 'text2text-generation', model = 'ramsrigouthamg/t5_squad_v1' ) file = st.file_uploader(label='Upload',accept_multiple_files=True) pr = st.button(label='Start') if pr: st.write('Hi') raw_text = get_pdf_text(file) sentences = nltk.sent_tokenize(text=raw_text) # st.write(sts) # for i in sentences: # st.write(i) questions = [] st.subheader("Generated Questions are: ") s = pipe(sentences) for i in s: questions.append(i['generated_text'][10:]) st.write(i['generated_text'][10:]) if st.toggle(label='Show Pipeline Output'): st.write(s) if st.toggle(label='Show Questions list'): st.write(questions) # for i in sts: # x = pipe(i) # questions.append(x) # st.write(x) if __name__ == '__main__': main()