Spaces:

Sifal
/

En2Kab

Sleeping

Sifal commited on Oct 20, 2023

Commit

5813d56

1 Parent(s): 36d6903

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -209,6 +209,8 @@ def beam_search_decode(model, src, src_mask, max_len, start_symbol, beam_size ,l
 def translate(src_sentence: str, strategy:str = 'greedy' , lenght_extend :int = 5, beam_size: int = 5,  length_penalty:float = 0.6):
     assert strategy in ['greedy','beam search'], 'the strategy for decoding has to be either greedy or beam search'
     # Tokenize the source sentence
     src = source_tokenizer(src_sentence, **token_config)['input_ids']
     num_tokens = src.shape[1]
@@ -218,7 +220,9 @@ def translate(src_sentence: str, strategy:str = 'greedy' , lenght_extend :int =
        tgt_tokens = greedy_decode(model, src, src_mask, max_len=num_tokens + lenght_extend, start_symbol=target_tokenizer.bos_token_id).flatten()
     # Generate the target tokens using beam search decoding
     else:
-        tgt_tokens = beam_search_decode(model, src, src_mask, max_len=num_tokens + lenght_extend, start_symbol=target_tokenizer.bos_token_id, beam_size=beam_size,length_penalty=length_penalty).flatten()
     # Decode the target tokens and clean up the result
     return target_tokenizer.decode(tgt_tokens, clean_up_tokenization_spaces=True, skip_special_tokens=True)

 def translate(src_sentence: str, strategy:str = 'greedy' , lenght_extend :int = 5, beam_size: int = 5,  length_penalty:float = 0.6):
     assert strategy in ['greedy','beam search'], 'the strategy for decoding has to be either greedy or beam search'
+    assert lenght_extend >= 1, 'lenght_extend must be superior or equal to one'
     # Tokenize the source sentence
     src = source_tokenizer(src_sentence, **token_config)['input_ids']
     num_tokens = src.shape[1]
        tgt_tokens = greedy_decode(model, src, src_mask, max_len=num_tokens + lenght_extend, start_symbol=target_tokenizer.bos_token_id).flatten()
     # Generate the target tokens using beam search decoding
     else:
+        assert length_penalty >= 0 , 'lenght penelity must be superior or equal to zero'
+        assert beam_size >= 1, 'beam size must superior or equal to one'
+        tgt_tokens = beam_search_decode(model, src, src_mask, maxt_len=num_tokens + lenght_extend, start_symbol=target_tokenizer.bos_token_id, beam_size=beam_size,length_penalty=length_penalty).flatten()
     # Decode the target tokens and clean up the result
     return target_tokenizer.decode(tgt_tokens, clean_up_tokenization_spaces=True, skip_special_tokens=True)