Spaces:
Sleeping
Sleeping
import streamlit as st | |
from transformers import pipeline | |
from PyPDF2 import PdfReader | |
import PyPDF2 | |
import os | |
import nltk | |
nltk.download('punkt') | |
def get_pdf_text(pdf_docs): | |
text = "" | |
for pdf in pdf_docs: | |
pdf_reader = PdfReader(pdf) | |
for page in pdf_reader.pages: | |
text += page.extract_text() | |
return text | |
def main(): | |
st.title('Question Generator from PDFs') | |
pipe = pipeline( | |
task = 'text2text-generation', | |
model = 'ramsrigouthamg/t5_squad_v1' | |
) | |
file = st.file_uploader(label='Upload',accept_multiple_files=True) | |
pr = st.button(label='Start') | |
if pr: | |
st.write('Hi') | |
raw_text = get_pdf_text(file) | |
sentences = nltk.sent_tokenize(text=raw_text) | |
# st.write(sts) | |
# for i in sentences: | |
# st.write(i) | |
questions = [] | |
st.subheader("Generated Questions are: ") | |
s = pipe(sentences) | |
for i in s: | |
questions.append(i['generated_text'][10:]) | |
st.write(i['generated_text'][10:]) | |
if st.toggle(label='Show Pipeline Output'): | |
st.write(s) | |
if st.toggle(label='Show Questions list'): | |
st.write(questions) | |
# for i in sts: | |
# x = pipe(i) | |
# questions.append(x) | |
# st.write(x) | |
if __name__ == '__main__': | |
main() |