Spaces:
Building
Building
import display_gloss as dg | |
import synonyms_preprocess as sp | |
from NLP_Spacy_base_translator import NlpSpacyBaseTranslator | |
from flask import Flask, render_template, Response, request | |
from transformers import MarianMTModel, MarianTokenizer | |
import torch | |
import os | |
app = Flask(__name__, static_folder='static') | |
app.config['TITLE'] = 'Sign Language Translate' | |
# Set cache directory | |
cache_dir = "/tmp/huggingface" | |
if not os.path.exists(cache_dir): | |
os.makedirs(cache_dir, exist_ok=True) | |
os.environ['TRANSFORMERS_CACHE'] = cache_dir | |
os.environ['HF_HOME'] = cache_dir | |
# Force CPU usage | |
device = torch.device('cpu') | |
os.environ['CUDA_VISIBLE_DEVICES'] = '' | |
# Load pre-trained Korean-English translation model | |
model_name = "Helsinki-NLP/opus-mt-ko-en" | |
tokenizer = MarianTokenizer.from_pretrained(model_name, cache_dir=cache_dir) | |
model = MarianMTModel.from_pretrained(model_name, cache_dir=cache_dir) | |
model = model.to(device) | |
nlp, dict_docs_spacy = sp.load_spacy_values() | |
dataset, list_2000_tokens = dg.load_data() | |
def translate_korean_to_english(text): | |
try: | |
if any('\u3131' <= char <= '\u318F' or '\uAC00' <= char <= '\uD7A3' for char in text): | |
inputs = tokenizer(text, return_tensors="pt", padding=True) | |
outputs = model.generate(**inputs) | |
translation = tokenizer.decode(outputs[0], skip_special_tokens=True) | |
print(f"Translated text: {translation}") | |
return translation | |
return text | |
except Exception as e: | |
print(f"Translation error: {e}") | |
return text | |
def index(): | |
return render_template('index.html', title=app.config['TITLE']) | |
def result(): | |
if request.method == 'POST': | |
input_text = request.form['inputSentence'] | |
try: | |
english_text = translate_korean_to_english(input_text) | |
if english_text == input_text and any('\u3131' <= char <= '\u318F' or '\uAC00' <= char <= '\uD7A3' for char in input_text): | |
raise Exception("Translation failed") | |
eng_to_asl_translator = NlpSpacyBaseTranslator(sentence=english_text) | |
generated_gloss = eng_to_asl_translator.translate_to_gloss() | |
gloss_list_lower = [gloss.lower() for gloss in generated_gloss.split() if gloss.isalnum()] | |
gloss_sentence_before_synonym = " ".join(gloss_list_lower) | |
gloss_list = [sp.find_synonyms(gloss, nlp, dict_docs_spacy, list_2000_tokens) | |
for gloss in gloss_list_lower] | |
gloss_sentence_after_synonym = " ".join(gloss_list) | |
return render_template('result.html', | |
title=app.config['TITLE'], | |
original_sentence=input_text, | |
english_translation=english_text, | |
gloss_sentence_before_synonym=gloss_sentence_before_synonym, | |
gloss_sentence_after_synonym=gloss_sentence_after_synonym) | |
except Exception as e: | |
print(f"Error in translation process: {e}") | |
return render_template('error.html', error=str(e)) | |
def video_feed(): | |
sentence = request.args.get('gloss_sentence_to_display', '') | |
gloss_list = sentence.split() | |
return Response(dg.generate_video(gloss_list, dataset, list_2000_tokens), | |
mimetype='multipart/x-mixed-replace; boundary=frame') | |
if __name__ == "__main__": | |
app.run(host="0.0.0.0", port=7860, debug=True) |