vishwask commited on
Commit
4c32e5c
·
verified ·
1 Parent(s): c411529

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +18 -11
app.py CHANGED
@@ -35,21 +35,28 @@ def intitalize_lang(language):
35
  print("intitalize_lang"+lang_global)
36
 
37
  def english_to_indian(sentence):
38
- print ("english_to_indian"+lang_global)
 
39
  translation_tokenizer.src_lang = "en_xx"
40
- encoded_hi = translation_tokenizer(sentence, return_tensors="pt")
41
- generated_tokens = translation_model.generate(**encoded_hi,
42
- forced_bos_token_id=translation_tokenizer.lang_code_to_id[lang_global] )
43
- return (translation_tokenizer.batch_decode(generated_tokens, skip_special_tokens=True))
44
-
 
 
 
45
 
46
  def indian_to_english(sentence):
 
47
  translation_tokenizer.src_lang = lang_global
48
- encoded_hi = translation_tokenizer(sentence, return_tensors="pt")
49
- generated_tokens = translation_model.generate(**encoded_hi,
50
- forced_bos_token_id=translation_tokenizer.lang_code_to_id["en_XX"] )
51
- return (translation_tokenizer.batch_decode(generated_tokens, skip_special_tokens=True))
52
-
 
 
53
 
54
 
55
  llm_model = "mistralai/Mistral-7B-Instruct-v0.2"
 
35
  print("intitalize_lang"+lang_global)
36
 
37
  def english_to_indian(sentence):
38
+ #print ("english_to_indian"+lang_global)
39
+ translated_sentence = ''
40
  translation_tokenizer.src_lang = "en_xx"
41
+ chunks = [sentence[i:i+500] for i in range(0, len(sentence), 500)]
42
+ for chunk in chunks:
43
+ encoded_hi = translation_tokenizer(chunk, return_tensors="pt")
44
+ generated_tokens = translation_model.generate(**encoded_hi,
45
+ forced_bos_token_id=translation_tokenizer.lang_code_to_id[lang_global] )
46
+ x = translation_tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)
47
+ translated_sentence = translated_sentence + x
48
+ return translated_sentence
49
 
50
  def indian_to_english(sentence):
51
+ translated_sentence = ''
52
  translation_tokenizer.src_lang = lang_global
53
+ chunks = [sentence[i:i+500] for i in range(0, len(sentence), 500)]
54
+ for chunk in chunks:
55
+ encoded_hi = translation_tokenizer(chunk, return_tensors="pt")
56
+ generated_tokens = translation_model.generate(**encoded_hi, forced_bos_token_id=translation_tokenizer.lang_code_to_id["en_XX"] )
57
+ x = translation_tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)
58
+ translated_sentence = translated_sentence + x
59
+ return translated_sentence
60
 
61
 
62
  llm_model = "mistralai/Mistral-7B-Instruct-v0.2"