FastAPIMT

Running

TiberiuCristianLeon commited on Jul 21

Commit

49ae858

verified ·

1 Parent(s): 2130cf2

Update src/translate/Translate.py

Files changed (1) hide show

src/translate/Translate.py CHANGED Viewed

@@ -17,7 +17,7 @@ modelROMENG.to(device)
 modelENGROM.to(device)
-def paraphraseTranslateMethod(requestValue : str):
 	exception = ""
 	result_value = ""
@@ -30,33 +30,27 @@ def paraphraseTranslateMethod(requestValue : str):
 	tokenized_sent_list = sent_tokenize(requestValue)
 	for SENTENCE in tokenized_sent_list:
-		input_ids1 = tokenizerROMENG(SENTENCE, return_tensors='pt').to(device)
-		output1 = modelROMENG.generate(
-	        input_ids=input_ids1.input_ids,
-	        do_sample=True,
-	        max_length=256,
-	        top_k=90,
-	        top_p=0.97,
-	        early_stopping=False
-	    )
-		result1 = tokenizerROMENG.batch_decode(output1, skip_special_tokens=True)[0]
-		input_ids = tokenizerENGROM(result1, return_tensors='pt').to(device)
-		output = modelENGROM.generate(
-			input_ids=input_ids.input_ids,
-			do_sample=True,
-			max_length=256,
-			top_k=90,
-			top_p=0.97,
-			early_stopping=False
-		)
 		result = tokenizerENGROM.batch_decode(output, skip_special_tokens=True)[0]
-		result_value += result + " "
-	return result_value, ""

 modelENGROM.to(device)
+def paraphraseTranslateMethod(requestValue : str, model: str):
 	exception = ""
 	result_value = ""
 	tokenized_sent_list = sent_tokenize(requestValue)
 	for SENTENCE in tokenized_sent_list:
+        if model == 'roen'
+            input_ids = tokenizerROMENG(SENTENCE, return_tensors='pt').to(device)
+    		output = modelROMENG.generate(
+    	        input_ids=input_ids1.input_ids,
+    	        do_sample=True,
+    	        max_length=512,
+    	        top_k=90,
+    	        top_p=0.97,
+    	        early_stopping=False
+    	    )
+    		result = tokenizerROMENG.batch_decode(output1, skip_special_tokens=True)[0]
+        else:
+    		input_ids = tokenizerENGROM(SENTENCE, return_tensors='pt').to(device)
+    		output = modelENGROM.generate(
+    			input_ids=input_ids.input_ids,
+    			do_sample=True,
+    			max_length=512,
+    			top_k=90,
+    			top_p=0.97,
+    			early_stopping=False
+    		)
 		result = tokenizerENGROM.batch_decode(output, skip_special_tokens=True)[0]
+	return result.strip(), ""