video_translator

Sleeping

App Files Files Community

video_translator / sourcetoTarget.py

Gotenks1893

Upload 8 files

168a18b over 1 year ago

raw

history blame

2.68 kB

	# Transformers installation
	# pip install transformers

	import os
	from transformers import AutoModelForSeq2SeqLM, AutoTokenizer


	# define tokenizer and model
	tokenizer = AutoTokenizer.from_pretrained("facebook/nllb-200-distilled-600M")
	model = AutoModelForSeq2SeqLM.from_pretrained(
	"facebook/nllb-200-distilled-600M")


	# translation function for single sentence
	def translateSentence(sentence):
	inputs = tokenizer(sentence, return_tensors="pt")
	translated_tokens = model.generate(
	**inputs, forced_bos_token_id=tokenizer.lang_code_to_id["deu_Latn"], max_length=400
	)
	translatedSentence = tokenizer.batch_decode(
	translated_tokens, skip_special_tokens=True)[0]
	return translatedSentence

	def translateAllTranscripts():

	# source text directory
	directory = os.fsencode("whisper")

	# go through source text files
	# translate them to target language (German)
	# save translated text files to translatedTranscripts file
	for file in os.listdir(directory):
	filename = os.fsdecode(file)
	print(f'reading {filename}')
	if (filename.endswith(".txt")):
	file_path = f"whisper/{filename}"
	with open(file_path, 'r') as f:
	sourceText = f.read() # read source text
	sentences = sourceText.split('.')
	translationArr = []
	for sentence in sentences:
	# translate each sentence
	translatedSentence = translateSentence(sentence)
	translationArr.append(translatedSentence)
	# join translated sentences
	translation = " ".join(translationArr)
	# save translated transcript
	fname = "translatedTranscripts/" + filename
	with open(fname, "w", encoding="UTF8") as ft:
	ft.write(translation)
	print("written to: " + fname)


	def englishToGerman(videoId):
	file_path = f"whisper/{videoId}.txt"
	with open(file_path, 'r') as f:
	sourceText = f.read() # read source text
	sentences = sourceText.split('.')
	translationArr = []
	for sentence in sentences:
	# translate each sentence
	translatedSentence = translateSentence(sentence)
	translationArr.append(translatedSentence)
	# join translated sentences
	translation = " ".join(translationArr)
	# save translated transcript
	fname = f"translatedTranscripts/{videoId}.txt"
	with open(fname, "w", encoding="UTF8") as ft:
	ft.write(translation)
	print("written to: " + fname)