File size: 2,482 Bytes
05942ee
 
 
 
558653a
05942ee
 
 
 
 
 
 
 
 
558653a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
05942ee
 
 
 
 
 
 
 
 
 
 
558653a
 
 
 
 
 
 
 
 
05942ee
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
import streamlit as st
from transformers import AutoTokenizer,AutoModelForSeq2SeqLM

@st.cache(persist=True)
def load_model(input_complex_sentence,model, tokenizer):
	
	tokenized_sentence = tokenizer(input_complex_sentence,return_tensors="pt")
	result = model.generate(tokenized_sentence['input_ids'],attention_mask = tokenized_sentence['attention_mask'],max_length=256,num_beams=5)
	generated_sentence = tokenizer.decode(result[0],skip_special_tokens=True)
	
	return generated_sentence

def main():

	t5_base_path = "flax-community/t5-base-wikisplit"
	t5_base_tokenizer = AutoTokenizer.from_pretrained(t5_base_path)
	t5_base_model     = AutoModelForSeq2SeqLM.from_pretrained(t5_base_path)

	t5_v1_1_base_path = "flax-community/t5-v1_1-base-wikisplit"
	t5_v1_1_base_tokenizer = AutoTokenizer.from_pretrained(t5_v1_1_base_path)
	t5_v1_1_base_model     = AutoModelForSeq2SeqLM.from_pretrained(t5_v1_1_base_path)

	byt5_base_path = "flax-community/byt5-base-wikisplit"
	byt5_base_tokenizer = AutoTokenizer.from_pretrained(byt5_base_path)
	byt5_base_model     = AutoModelForSeq2SeqLM.from_pretrained(byt5_base_path)

	t5_large_path = "flax-community/t5-large-wikisplit"
	t5_large_tokenizer = AutoTokenizer.from_pretrained(t5_large_path)
	t5_large_model     = AutoModelForSeq2SeqLM.from_pretrained(t5_large_path)

	st.title("✂️ Sentence Split in English using T5 variants")
	st.write("Sentence Split is the task of dividing a long Sentence into multiple Sentences")
	
	model = st.sidebar.selectbox(
				  "Please Choose the Model",
				   ("t5-base-wikisplit","t5-v1_1-base-wikisplit", "byt5-base-wikisplit","t5-large-wikisplit"))
	st.write("Model Selected : ", model)
	
	example = "Mary likes to play football in her freetime whenever she meets with her friends that are very nice people."
	input_complex_sentence = st.text_area("Please type a long Sentence to split",example)

	if st.button('Simplify'):
		
		if model=="t5-base-wikisplit":
			generated_sentence = load_model(input_complex_sentence, t5_base_model, t5_base_tokenizer)
		elif model=="t5-v1_1-base-wikisplit":
			generated_sentence = load_model(input_complex_sentence, t5_v1_1_base_model, t5_v1_1_base_tokenizer)
		elif model=="byt5-base-wikisplit":
			generated_sentence = load_model(input_complex_sentence, byt5_base_model, byt5_base_tokenizer)
		else:
			generated_sentence = load_model(input_complex_sentence, t5_large_model, t5_large_tokenizer)
		st.write(generated_sentence)


if __name__ == "__main__":
	main()