mport streamlit as st from textwrap3 import wrap from flashtext import KeywordProcessor import torch, random, nltk, string, traceback, sys, os, requests, datetime import numpy as np import pandas as pd from transformers import T5ForConditionalGeneration,T5Tokenizer import pke from helper import postprocesstext, summarizer, get_nouns_multipartite, get_keywords,\ get_question, get_related_word, get_final_option_list, load_raw_text def set_seed(seed: int): random.seed(seed) np.random.seed(seed) torch.manual_seed(seed) torch.cuda.manual_seed_all(seed) set_seed(42) @st.cache(allow_output_mutation = True) def load_model(): nltk.download('punkt') nltk.download('brown') nltk.download('wordnet') nltk.download('stopwords') nltk.download('wordnet') nltk.download('omw-1.4') summary_mod_name = os.environ["summary_mod_name"] question_mod_name = os.environ["question_mod_name"] summary_model = T5ForConditionalGeneration.from_pretrained(summary_mod_name) summary_tokenizer = T5Tokenizer.from_pretrained(summary_mod_name) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") summary_model = summary_model.to(device) question_model = T5ForConditionalGeneration.from_pretrained(question_mod_name) question_tokenizer = T5Tokenizer.from_pretrained(question_mod_name) question_model = question_model.to(device) return summary_model, summary_tokenizer, question_tokenizer, question_model from nltk.corpus import wordnet as wn from nltk.tokenize import sent_tokenize from nltk.corpus import stopwords def csv_downloader(df): res = df.to_csv(index=False,sep="\t").encode('utf-8') st.download_button( label="Download logs data as CSV separated by tab", data=res, file_name='df_quiz_log_file_v1.csv', mime='text/csv') def load_file(): """Load text from file""" uploaded_file = st.file_uploader("Upload Files",type=['txt']) if uploaded_file is not None: if uploaded_file.type == "text/plain": raw_text = str(uploaded_file.read(),"utf-8") return raw_text st.markdown('![Visitor count](https://shields-io-visitor-counter.herokuapp.com/badge?page=https://share.streamlit.io/https://huggingface.co/spaces/aakashgoel12/getmcq&label=VisitorsCount&labelColor=000000&logo=GitHub&logoColor=FFFFFF&color=1D70B8&style=for-the-badge)') # Loading Model summary_model, summary_tokenizer, question_tokenizer, question_model =load_model() # App title and description st.title("Exam Assistant") st.write("Upload text, Get ready for answering autogenerated questions") # Load file st.text("Disclaimer: This app stores user's input for model improvement purposes !!") # Load file default_text = load_raw_text() raw_text = st.text_area("Enter text here", default_text, height=250, max_chars=1000000, ) # raw_text = load_file() start_time = str(datetime.datetime.now()) if raw_text != None and raw_text != '': summary_text = summarizer(raw_text,summary_model,summary_tokenizer) ans_list = get_keywords(raw_text,summary_text) #print("Ans list: {}".format(ans_list)) questions = [] option1=[] option2=[] option3=[] option4=[] for idx,ans in enumerate(ans_list): #print("IDX: {}, ANS: {}".format(idx, ans)) ques = get_question(summary_text,ans,question_model,question_tokenizer) other_options = get_related_word(ans) final_options, ans_index = get_final_option_list(ans,other_options) option1.append(final_options[0]) option2.append(final_options[1]) option3.append(final_options[2]) option4.append(final_options[3]) if ques not in questions: html_str = f"""

{idx+1}: {ques}

""" html_str += f'

{final_options[0]}

' if ans_index == 0 else f'

{final_options[0]}

' html_str += f'

{final_options[1]}

' if ans_index == 1 else f'

{final_options[1]}

' html_str += f'

{final_options[2]}

' if ans_index == 2 else f'

{final_options[2]}

' html_str += f'

{final_options[3]}

' if ans_index == 3 else f'

{final_options[3]}

' html_str += f""" """ st.markdown(html_str , unsafe_allow_html=True) st.markdown("-----") questions.append(ques) output_path = "results/df_quiz_log_file_v1.csv" res_df = pd.DataFrame({"TimeStamp":[start_time]*len(ans_list),\ "Input":[str(raw_text)]*len(ans_list),\ "Question":questions,"Option1":option1,\ "Option2":option2,\ "Option3":option3,\ "Option4":option4,\ "Correct Answer":ans_list}) res_df.to_csv(output_path, mode='a', index=False, sep="\t", header= not os.path.exists(output_path)) # st.dataframe(pd.read_csv(output_path,sep="\t").tail(5)) csv_downloader(pd.read_csv(output_path,sep="\t"))