Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -6,7 +6,7 @@ model_name = "Helsinki-NLP/opus-mt-en-ROMANCE"
|
|
6 |
model = MarianMTModel.from_pretrained(model_name)
|
7 |
tokenizer = MarianTokenizer.from_pretrained(model_name)
|
8 |
|
9 |
-
# Define language codes
|
10 |
language_codes = {
|
11 |
'French': 'fr',
|
12 |
'German': 'de',
|
@@ -19,8 +19,9 @@ language_codes = {
|
|
19 |
def translate_text(text, target_lang_code):
|
20 |
# Prepare the input and translate
|
21 |
inputs = tokenizer.encode(text, return_tensors="pt")
|
22 |
-
#
|
23 |
-
|
|
|
24 |
translated_text = tokenizer.decode(translated[0], skip_special_tokens=True)
|
25 |
return translated_text
|
26 |
|
|
|
6 |
model = MarianMTModel.from_pretrained(model_name)
|
7 |
tokenizer = MarianTokenizer.from_pretrained(model_name)
|
8 |
|
9 |
+
# Define target language codes directly
|
10 |
language_codes = {
|
11 |
'French': 'fr',
|
12 |
'German': 'de',
|
|
|
19 |
def translate_text(text, target_lang_code):
|
20 |
# Prepare the input and translate
|
21 |
inputs = tokenizer.encode(text, return_tensors="pt")
|
22 |
+
# Set the decoder start token ID based on language code
|
23 |
+
decoder_start_token_id = tokenizer.convert_tokens_to_ids(f"<{target_lang_code}>")
|
24 |
+
translated = model.generate(inputs, decoder_start_token_id=decoder_start_token_id)
|
25 |
translated_text = tokenizer.decode(translated[0], skip_special_tokens=True)
|
26 |
return translated_text
|
27 |
|