Spaces:
Sleeping
Sleeping
import streamlit as st | |
from transformers import pipeline | |
from PyPDF2 import PdfReader | |
import nltk | |
import pandas as pd | |
nltk.download('punkt') | |
st.title(body='7 - Question Generation') | |
def get_pdf_text(pdf_docs): | |
text = "" | |
for pdf in pdf_docs: | |
pdf_reader = PdfReader(pdf) | |
for page in pdf_reader.pages: | |
text += page.extract_text() | |
return text | |
######################################################## | |
st.subheader(body='Proposition 1',divider='orange') | |
if st.toggle(label='Show Proposition 1'): | |
st.title('Question Generator from PDFs') | |
if st.checkbox('Show Caption'): | |
st.caption('Hugging Face Model used: ramsrigouthamg/t5_squad_v1') | |
pipe = pipeline( | |
task = 'text2text-generation', | |
model = 'ramsrigouthamg/t5_squad_v1' | |
) | |
file = st.file_uploader(label='Upload',accept_multiple_files=True) | |
# pr = st.button(label='Process') | |
raw_text = get_pdf_text(file) | |
sentences = nltk.sent_tokenize(text=raw_text) | |
s = pipe(sentences) | |
questions = [] | |
for i in s: | |
x = i['generated_text'][10:] | |
questions.append(x) | |
# st.write(f':blue[{x}]') | |
if st.toggle('Show Text'): | |
st.write(raw_text) | |
if st.toggle(label='Show Questions'): | |
st.subheader("*Generated Questions are:*") | |
for i in questions: | |
st.write(f':blue[{i}]') | |
if st.toggle(label='Show Pipeline Output'): | |
st.write(s) | |
if st.toggle(label='Show Questions list'): | |
st.write(questions) | |
if questions: | |
df = pd.DataFrame(questions, columns=["Question"]) | |
csv = df.to_csv(index=False).encode('utf-8') | |
st.download_button( | |
label="Download Questions as CSV", | |
data=csv, | |
file_name='questions.csv', | |
mime='text/csv' | |
) | |