DevBM's picture
Rename b.py to app.py
6b8a350 verified
raw
history blame
1.39 kB
import streamlit as st
from transformers import pipeline
from PyPDF2 import PdfReader
import PyPDF2
import fitz
import os
import nltk
def get_pdf_text(pdf_docs):
text = ""
for pdf in pdf_docs:
pdf_reader = PdfReader(pdf)
for page in pdf_reader.pages:
text += page.extract_text()
return text
def main():
st.title('Question Generator from PDFs')
pipe = pipeline(
task = 'text2text-generation',
model = 'ramsrigouthamg/t5_squad_v1'
)
file = st.file_uploader(label='Upload',accept_multiple_files=True)
pr = st.button(label='Start')
if pr:
st.write('Hi')
raw_text = get_pdf_text(file)
sentences = nltk.sent_tokenize(text=raw_text)
# st.write(sts)
# for i in sentences:
# st.write(i)
questions = []
st.subheader("Generated Questions are: ")
s = pipe(sentences)
for i in s:
questions.append(i['generated_text'][10:])
st.write(i['generated_text'][10:])
if st.toggle(label='Show Pipeline Output'):
st.write(s)
if st.toggle(label='Show Questions list'):
st.write(questions)
# for i in sts:
# x = pipe(i)
# questions.append(x)
# st.write(x)
if __name__ == '__main__':
main()