Spaces:

nvidia
/

Test-Time-Translation-LLM-Demo

Runtime error

App Files Files Community

huckiyang commited on Mar 15

Commit

cb62b20

1 Parent(s): fe23ebb

navie plan2align

Browse files

Files changed (1) hide show

app.py +177 -193

app.py CHANGED Viewed

@@ -3,223 +3,207 @@ import gradio as gr
 import torch
 from transformers import AutoTokenizer, AutoModelForCausalLM
 from trl import AutoModelForCausalLMWithValueHead
-from huggingface_hub import login
-# Set your Hugging Face token as an environment variable
-# You can also use os.environ["HUGGINGFACE_TOKEN"] = "your_token_here" in your code
-# But using environment variables outside the code is more secure
-# Authenticate with Hugging Face
-login(token=os.environ.get("LA_NAME"))
-# Set device and dtype
-device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-torch_dtype = torch.bfloat16
-# Load models only once at startup
 print("Loading models...")
-model_id = "meta-llama/Meta-Llama-3.1-8B"  # Replace with your actual model ID
 tokenizer = AutoTokenizer.from_pretrained(model_id)
-lm_model = AutoModelForCausalLM.from_pretrained(
-    model_id,
-    torch_dtype=torch_dtype,
-    device_map="auto"
 )
-# Load the reward model - fix the offloading issue
-print("Loading reward model...")
-RM = AutoModelForCausalLMWithValueHead.from_pretrained(
-    'ray24724919/plan2align_rm',
-    torch_dtype=torch_dtype,
-    device_map={"": 0},  # Force model to stay on GPU (device 0)
-    offload_folder=None,  # Disable offloading
-)
-RM.eval()
-print("Models loaded successfully!")
-# Self-contained translation and evaluation functions
-def translate(source_text, target_language="English"):
-    """
-    Translate text from Chinese to the specified target language.
-    Args:
-        source_text (str): The Chinese text to translate
-        target_language (str): The target language for translation
-    Returns:
-        str: The translated text
-    """
-    # Format the input as per the system prompt
-    messages = [
-        {"role": "system", "content": "You are a helpful translator and only output the result."},
-        {"role": "user", "content": f"### Translate this from Chinese to {target_language}, Chinese:\n{source_text}\n### {target_language}:"}
-    ]
-    # Format messages for the model
-    prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
-    # Tokenize the input
-    inputs = tokenizer(prompt, return_tensors="pt").to(device)
-    # Generate translation
-    with torch.no_grad():
-        outputs = lm_model.generate(
-            **inputs,
-            max_new_tokens=512,
-            temperature=0.7,
-            do_sample=True,
-            pad_token_id=tokenizer.eos_token_id
         )
-    # Decode the generated text
-    translation = tokenizer.decode(outputs[0][inputs.input_ids.shape[1]:], skip_special_tokens=True).strip()
-    return translation
-def evaluate_translation(source_text, translation, target_language="English"):
-    """
-    Evaluate the quality of a translation using the reward model.
-    Args:
-        source_text (str): The original Chinese text
-        translation (str): The translated text
-        target_language (str): The target language of the translation
-    Returns:
-        float: The reward score
-    """
-    messages = [
-        {"role": "system", "content": "You are a helpful translator and only output the result."},
-        {"role": "user", "content": f"### Translate this from Chinese to {target_language}, Chinese:\n{source_text}\n### {target_language}:"},
-        {"role": "assistant", "content": translation}
-    ]
-    # Format messages for the reward model
-    prompt = tokenizer.apply_chat_template(messages, tokenize=False)
-    # Tokenize the input
-    inputs = tokenizer(prompt, return_tensors="pt").to(device)
-    # Get reward score
-    with torch.no_grad():
-        outputs = RM(input_ids=inputs.input_ids)
-        reward_score = outputs.value.item()
-    return reward_score
-# Combined function for the Gradio interface
-def translate_text(source_text, target_language):
-    """
-    Translate text and get reward score
-    Args:
-        source_text (str): The Chinese text to translate
-        target_language (str): The target language for translation
-    Returns:
-        tuple: (translation, reward_score)
-    """
-    if not source_text.strip():
-        return "Please enter some text to translate.", 0.0
-    try:
-        translation = translate(source_text, target_language)
-        reward_score = evaluate_translation(source_text, translation, target_language)
-        return translation, float(reward_score)
-    except Exception as e:
-        return f"Error: {str(e)}", 0.0
-# Define available target languages
-target_languages = [
-    "English", "French", "Spanish", "German", "Italian",
-    "Portuguese", "Russian", "Japanese", "Korean", "Arabic"
-]
-# Create the Gradio interface
-with gr.Blocks(title="Chinese Translation with Reward Scoring") as demo:
-    gr.Markdown("# Chinese to Any Language Translation")
-    gr.Markdown("This demo translates Chinese text to your chosen language and provides a quality score from our reward model.")
-    with gr.Row():
-        with gr.Column():
-            source_text = gr.Textbox(
-                label="Chinese Text",
-                placeholder="Enter Chinese text here...",
-                lines=5
-            )
-            target_language = gr.Dropdown(
-                choices=target_languages,
-                value="English",
-                label="Target Language"
-            )
-            translate_button = gr.Button("Translate")
-        with gr.Column():
-            translation_output = gr.Textbox(
-                label="Translation",
-                lines=5,
-                interactive=False
-            )
-            reward_score = gr.Number(
-                label="Translation Quality Score (higher is better)",
-                precision=4,
-                interactive=False
-            )
-            with gr.Row():
-                score_indicator = gr.Label(label="Quality Rating")
-    # Function to update the quality rating based on score
-    def update_quality_rating(score):
-        if score >= 0.8:
-            return "Excellent"
-        elif score >= 0.6:
-            return "Good"
-        elif score >= 0.4:
-            return "Average"
-        elif score >= 0.2:
-            return "Poor"
-        else:
-            return "Very Poor"
-    # Set up the translation flow
-    translate_outputs = translate_button.click(
-        fn=translate_text,
-        inputs=[source_text, target_language],
-        outputs=[translation_output, reward_score]
-    )
-    # Update the quality rating whenever the reward score changes
-    reward_score.change(
-        fn=update_quality_rating,
-        inputs=[reward_score],
-        outputs=[score_indicator]
-    )
-    # Examples
-    gr.Examples(
-        examples=[
-            ["你好，世界！", "English"],
-            ["我喜欢学习新的语言。", "Spanish"],
-            ["北京烤鴨很好吃。", "French"],
-            ["人工智能正在改变世界。", "German"],
-            ["今天天气真好。", "Japanese"]
-        ],
-        inputs=[source_text, target_language],
-        outputs=[translation_output, reward_score],
-        fn=translate_text
-    )
-    gr.Markdown("## How It Works")
-    gr.Markdown("""
-    1. Enter Chinese text in the input box
-    2. Select your desired target language
-    3. Click 'Translate' to get the translation
-    4. The system will display the translation and a quality score
-    The quality score is generated by a reward model trained to evaluate translation quality.
-    Higher scores indicate better translations.
-    """)
-# Launch the app
 if __name__ == "__main__":
     demo.launch()

 import torch
 from transformers import AutoTokenizer, AutoModelForCausalLM
 from trl import AutoModelForCausalLMWithValueHead
+from safetensors.torch import load_file
+import logging
+# Set up logging
+logging.basicConfig(level=logging.INFO)
+# Constants
+THRESHOLD = 2  # From Plan2Align
+# Initialize device
+device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
+print(f"Using device: {device}")
+# Load models once
 print("Loading models...")
+model_id = "meta-llama/Meta-Llama-3.1-8B-Instruct"
 tokenizer = AutoTokenizer.from_pretrained(model_id)
+model = AutoModelForCausalLM.from_pretrained(
+    model_id,
+    device_map="auto",
+    torch_dtype=torch.float16
 )
+class RewardModel:
+    def __init__(self, device, tokenizer, torch_dtype=torch.float16):
+        self.device = device
+        self.tokenizer = tokenizer
+        if self.tokenizer.pad_token is None:
+            self.tokenizer.pad_token = self.tokenizer.eos_token
+        # Set chat template if not already set
+        if not hasattr(self.tokenizer, 'chat_template') or self.tokenizer.chat_template is None:
+            # Using Llama 3's default chat template
+            self.tokenizer.chat_template = "<|begin_of_text|>{% for message in messages %}{{'<|start_header_id|>' + message['role'] + '<|end_header_id|>\n' + message['content'] + '<|eot_id|>'}}{% endfor %}"
+        print("Loading reward model...")
+        self.RM = AutoModelForCausalLMWithValueHead.from_pretrained(
+            "ray24724919/plan2align_rm",
+            device_map={"": 0},  # Force model to stay on GPU
+            torch_dtype=torch_dtype
         )
+        self.RM.eval()
+        print("Reward model loaded successfully!")
+    def _create_single_message(self, language, source, translation):
+        return [
+            {
+                "role": "system",
+                "content": "You are a helpful translator and only output the result."
+            },
+            {
+                "role": "user",
+                "content": f"### Translate this from Chinese to {language}, Chinese:\n{source}\n### {language}:"
+            },
+            {
+                "role": "assistant",
+                "content": translation
+            }
+        ]
+    def _process_inputs(self, messages):
+        try:
+            input_ids = self.tokenizer.apply_chat_template(
+                messages,
+                add_generation_prompt=False,
+                return_tensors="pt",
+                padding=True,
+                truncation=True
+            )
+            attention_mask = torch.ones_like(input_ids)
+            input_ids = input_ids.to(self.device)
+            attention_mask = attention_mask.to(self.device)
+            if len(input_ids.shape) == 1:
+                input_ids = input_ids.unsqueeze(0)
+                attention_mask = attention_mask.unsqueeze(0)
+            return {
+                "input_ids": input_ids,
+                "attention_mask": attention_mask
+            }
+        except Exception as e:
+            logging.error(f"Error processing inputs: {str(e)}")
+            raise
+    def reward_fn(self, language, source, translations):
+        try:
+            all_rewards = []
+            for translation in translations:
+                messages = self._create_single_message(language, source, translation)
+                inputs = self._process_inputs(messages)
+                with torch.no_grad():
+                    outputs = self.RM(**inputs, return_value=True)
+                    rewards = outputs[2]
+                reward = rewards[0, -1].cpu().item()
+                all_rewards.append(reward)
+            return all_rewards
+        except Exception as e:
+            logging.error(f"Error in reward_fn: {str(e)}")
+            raise
+    def get_len(self, language, translations):
+        try:
+            len_ = 0
+            for translation in translations:
+                l = self.tokenizer(translation, return_tensors="pt").input_ids.to(device).shape[-1]
+                len_ += l
+            return len_
+        except Exception as e:
+            logging.error(f"Error in get_len: {str(e)}")
+            raise
+# Create reward model instance with the already loaded tokenizer
+reward_model = RewardModel(device, tokenizer, torch_dtype=torch.float16)
+print("Models loaded successfully!")
+# Helper functions from Plan2Align
+def rm_predict_preference(source, translation0, translation1, language="English"):
+    translations = [translation0, translation1]
+    for t_i in range(len(translations)):
+        translations[t_i] = ''.join(translations[t_i]).replace('</s>',' ')
+    rewards = reward_model.reward_fn(language, source.replace('</s>',' '), translations)
+    best_index = rewards.index(max(rewards))
+    return best_index
+def rm_find_best_translation(source, translations, language="English"):
+    copy_translations = translations.copy()
+    if len(translations) < 2:
+        return translations[0] if translations else None
+    for t_i in range(len(translations)):
+        translations[t_i] = ''.join(translations[t_i]).replace('</s>',' ')
+    rewards = reward_model.reward_fn(language, ''.join(source).replace('</s>',' '), translations)
+    print(rewards)
+    best_index = rewards.index(max(rewards))
+    print(f"Total translations length = {len(translations)}, and best translation index is: {best_index}")
+    if rewards[best_index] >= THRESHOLD:
+        return copy_translations[best_index]
+    else:
+        return None
+def translate_chinese_to_english(chinese_text):
+    # Generate multiple translations
+    translations = []
+    # Generate three different translations with different system prompts
+    system_prompts = [
+        "You are a meticulous translator. Provide a literal, word-for-word translation that preserves the structure and meaning of each individual word.",
+        "You are a professional translator. Deliver a clear, formal, and precise translation that faithfully conveys the original meaning.",
+        "You are a creative and expressive translator. Render the text in a vivid and imaginative way, as if narrating a captivating story."
+    ]
+    for prompt in system_prompts:
+        messages = [
+            {"role": "system", "content": prompt},
+            {"role": "user", "content": f"Translate the following Chinese text to English:\n\n{chinese_text}"}
+        ]
+        inputs = tokenizer.apply_chat_template(messages, return_tensors="pt").to(device)
+        outputs = model.generate(
+            inputs,
+            max_new_tokens=512,
+            temperature=0.7,
+            top_p=0.9,
+            do_sample=True
+        )
+        translation = tokenizer.decode(outputs[0][inputs.shape[1]:], skip_special_tokens=True)
+        translations.append(translation)
+    # Use reward model to find the best translation
+    best_translation = rm_find_best_translation(chinese_text, translations)
+    if best_translation is None:
+        # If no translation meets the threshold, return the first one
+        return translations[0]
+    return best_translation
+# Gradio interface
+def process_text(text):
+    return translate_chinese_to_english(text)
+demo = gr.Interface(
+    fn=process_text,
+    inputs=gr.Textbox(lines=5, placeholder="Enter Chinese text here..."),
+    outputs=gr.Textbox(lines=5),
+    title="Chinese to English Translation with Plan2Align",
+    description="This app uses the Plan2Align approach to translate Chinese text to English."
+)
 if __name__ == "__main__":
     demo.launch()