huckiyang commited on
Commit
cb62b20
·
1 Parent(s): fe23ebb

navie plan2align

Browse files
Files changed (1) hide show
  1. app.py +177 -193
app.py CHANGED
@@ -3,223 +3,207 @@ import gradio as gr
3
  import torch
4
  from transformers import AutoTokenizer, AutoModelForCausalLM
5
  from trl import AutoModelForCausalLMWithValueHead
6
- from huggingface_hub import login
 
7
 
8
- # Set your Hugging Face token as an environment variable
9
- # You can also use os.environ["HUGGINGFACE_TOKEN"] = "your_token_here" in your code
10
- # But using environment variables outside the code is more secure
11
 
12
- # Authenticate with Hugging Face
13
- login(token=os.environ.get("LA_NAME"))
14
 
15
- # Set device and dtype
16
- device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
17
- torch_dtype = torch.bfloat16
18
 
19
- # Load models only once at startup
20
  print("Loading models...")
21
- model_id = "meta-llama/Meta-Llama-3.1-8B" # Replace with your actual model ID
22
  tokenizer = AutoTokenizer.from_pretrained(model_id)
23
- lm_model = AutoModelForCausalLM.from_pretrained(
24
- model_id,
25
- torch_dtype=torch_dtype,
26
- device_map="auto"
27
  )
28
 
29
- # Load the reward model - fix the offloading issue
30
- print("Loading reward model...")
31
- RM = AutoModelForCausalLMWithValueHead.from_pretrained(
32
- 'ray24724919/plan2align_rm',
33
- torch_dtype=torch_dtype,
34
- device_map={"": 0}, # Force model to stay on GPU (device 0)
35
- offload_folder=None, # Disable offloading
36
- )
37
- RM.eval()
38
- print("Models loaded successfully!")
39
-
40
- # Self-contained translation and evaluation functions
41
- def translate(source_text, target_language="English"):
42
- """
43
- Translate text from Chinese to the specified target language.
44
-
45
- Args:
46
- source_text (str): The Chinese text to translate
47
- target_language (str): The target language for translation
48
 
49
- Returns:
50
- str: The translated text
51
- """
52
- # Format the input as per the system prompt
53
- messages = [
54
- {"role": "system", "content": "You are a helpful translator and only output the result."},
55
- {"role": "user", "content": f"### Translate this from Chinese to {target_language}, Chinese:\n{source_text}\n### {target_language}:"}
56
- ]
57
-
58
- # Format messages for the model
59
- prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
60
-
61
- # Tokenize the input
62
- inputs = tokenizer(prompt, return_tensors="pt").to(device)
63
-
64
- # Generate translation
65
- with torch.no_grad():
66
- outputs = lm_model.generate(
67
- **inputs,
68
- max_new_tokens=512,
69
- temperature=0.7,
70
- do_sample=True,
71
- pad_token_id=tokenizer.eos_token_id
72
  )
73
-
74
- # Decode the generated text
75
- translation = tokenizer.decode(outputs[0][inputs.input_ids.shape[1]:], skip_special_tokens=True).strip()
76
- return translation
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
77
 
78
- def evaluate_translation(source_text, translation, target_language="English"):
79
- """
80
- Evaluate the quality of a translation using the reward model.
81
 
82
- Args:
83
- source_text (str): The original Chinese text
84
- translation (str): The translated text
85
- target_language (str): The target language of the translation
86
-
87
- Returns:
88
- float: The reward score
89
- """
90
- messages = [
91
- {"role": "system", "content": "You are a helpful translator and only output the result."},
92
- {"role": "user", "content": f"### Translate this from Chinese to {target_language}, Chinese:\n{source_text}\n### {target_language}:"},
93
- {"role": "assistant", "content": translation}
94
- ]
95
 
96
- # Format messages for the reward model
97
- prompt = tokenizer.apply_chat_template(messages, tokenize=False)
98
 
99
- # Tokenize the input
100
- inputs = tokenizer(prompt, return_tensors="pt").to(device)
101
 
102
- # Get reward score
103
- with torch.no_grad():
104
- outputs = RM(input_ids=inputs.input_ids)
105
- reward_score = outputs.value.item()
106
 
107
- return reward_score
108
 
109
- # Combined function for the Gradio interface
110
- def translate_text(source_text, target_language):
111
- """
112
- Translate text and get reward score
113
-
114
- Args:
115
- source_text (str): The Chinese text to translate
116
- target_language (str): The target language for translation
117
-
118
- Returns:
119
- tuple: (translation, reward_score)
120
- """
121
- if not source_text.strip():
122
- return "Please enter some text to translate.", 0.0
123
-
124
- try:
125
- translation = translate(source_text, target_language)
126
- reward_score = evaluate_translation(source_text, translation, target_language)
127
- return translation, float(reward_score)
128
- except Exception as e:
129
- return f"Error: {str(e)}", 0.0
130
 
131
- # Define available target languages
132
- target_languages = [
133
- "English", "French", "Spanish", "German", "Italian",
134
- "Portuguese", "Russian", "Japanese", "Korean", "Arabic"
135
- ]
136
 
137
- # Create the Gradio interface
138
- with gr.Blocks(title="Chinese Translation with Reward Scoring") as demo:
139
- gr.Markdown("# Chinese to Any Language Translation")
140
- gr.Markdown("This demo translates Chinese text to your chosen language and provides a quality score from our reward model.")
 
 
 
 
 
 
141
 
142
- with gr.Row():
143
- with gr.Column():
144
- source_text = gr.Textbox(
145
- label="Chinese Text",
146
- placeholder="Enter Chinese text here...",
147
- lines=5
148
- )
149
- target_language = gr.Dropdown(
150
- choices=target_languages,
151
- value="English",
152
- label="Target Language"
153
- )
154
- translate_button = gr.Button("Translate")
155
 
156
- with gr.Column():
157
- translation_output = gr.Textbox(
158
- label="Translation",
159
- lines=5,
160
- interactive=False
161
- )
162
- reward_score = gr.Number(
163
- label="Translation Quality Score (higher is better)",
164
- precision=4,
165
- interactive=False
166
- )
167
-
168
- with gr.Row():
169
- score_indicator = gr.Label(label="Quality Rating")
170
-
171
- # Function to update the quality rating based on score
172
- def update_quality_rating(score):
173
- if score >= 0.8:
174
- return "Excellent"
175
- elif score >= 0.6:
176
- return "Good"
177
- elif score >= 0.4:
178
- return "Average"
179
- elif score >= 0.2:
180
- return "Poor"
181
- else:
182
- return "Very Poor"
183
-
184
- # Set up the translation flow
185
- translate_outputs = translate_button.click(
186
- fn=translate_text,
187
- inputs=[source_text, target_language],
188
- outputs=[translation_output, reward_score]
189
- )
190
-
191
- # Update the quality rating whenever the reward score changes
192
- reward_score.change(
193
- fn=update_quality_rating,
194
- inputs=[reward_score],
195
- outputs=[score_indicator]
196
- )
197
 
198
- # Examples
199
- gr.Examples(
200
- examples=[
201
- ["你好,世界!", "English"],
202
- ["我喜欢学习新的语言。", "Spanish"],
203
- ["北京烤鴨很好吃。", "French"],
204
- ["人工智能正在改变世界。", "German"],
205
- ["今天天气真好。", "Japanese"]
206
- ],
207
- inputs=[source_text, target_language],
208
- outputs=[translation_output, reward_score],
209
- fn=translate_text
210
- )
211
 
212
- gr.Markdown("## How It Works")
213
- gr.Markdown("""
214
- 1. Enter Chinese text in the input box
215
- 2. Select your desired target language
216
- 3. Click 'Translate' to get the translation
217
- 4. The system will display the translation and a quality score
218
 
219
- The quality score is generated by a reward model trained to evaluate translation quality.
220
- Higher scores indicate better translations.
221
- """)
 
 
 
 
 
 
 
 
 
 
222
 
223
- # Launch the app
224
  if __name__ == "__main__":
225
  demo.launch()
 
3
  import torch
4
  from transformers import AutoTokenizer, AutoModelForCausalLM
5
  from trl import AutoModelForCausalLMWithValueHead
6
+ from safetensors.torch import load_file
7
+ import logging
8
 
9
+ # Set up logging
10
+ logging.basicConfig(level=logging.INFO)
 
11
 
12
+ # Constants
13
+ THRESHOLD = 2 # From Plan2Align
14
 
15
+ # Initialize device
16
+ device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
17
+ print(f"Using device: {device}")
18
 
19
+ # Load models once
20
  print("Loading models...")
21
+ model_id = "meta-llama/Meta-Llama-3.1-8B-Instruct"
22
  tokenizer = AutoTokenizer.from_pretrained(model_id)
23
+ model = AutoModelForCausalLM.from_pretrained(
24
+ model_id,
25
+ device_map="auto",
26
+ torch_dtype=torch.float16
27
  )
28
 
29
+ class RewardModel:
30
+ def __init__(self, device, tokenizer, torch_dtype=torch.float16):
31
+ self.device = device
32
+ self.tokenizer = tokenizer
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
 
34
+ if self.tokenizer.pad_token is None:
35
+ self.tokenizer.pad_token = self.tokenizer.eos_token
36
+
37
+ # Set chat template if not already set
38
+ if not hasattr(self.tokenizer, 'chat_template') or self.tokenizer.chat_template is None:
39
+ # Using Llama 3's default chat template
40
+ self.tokenizer.chat_template = "<|begin_of_text|>{% for message in messages %}{{'<|start_header_id|>' + message['role'] + '<|end_header_id|>\n' + message['content'] + '<|eot_id|>'}}{% endfor %}"
41
+
42
+ print("Loading reward model...")
43
+ self.RM = AutoModelForCausalLMWithValueHead.from_pretrained(
44
+ "ray24724919/plan2align_rm",
45
+ device_map={"": 0}, # Force model to stay on GPU
46
+ torch_dtype=torch_dtype
 
 
 
 
 
 
 
 
 
 
47
  )
48
+ self.RM.eval()
49
+ print("Reward model loaded successfully!")
50
+
51
+ def _create_single_message(self, language, source, translation):
52
+ return [
53
+ {
54
+ "role": "system",
55
+ "content": "You are a helpful translator and only output the result."
56
+ },
57
+ {
58
+ "role": "user",
59
+ "content": f"### Translate this from Chinese to {language}, Chinese:\n{source}\n### {language}:"
60
+ },
61
+ {
62
+ "role": "assistant",
63
+ "content": translation
64
+ }
65
+ ]
66
+
67
+ def _process_inputs(self, messages):
68
+ try:
69
+ input_ids = self.tokenizer.apply_chat_template(
70
+ messages,
71
+ add_generation_prompt=False,
72
+ return_tensors="pt",
73
+ padding=True,
74
+ truncation=True
75
+ )
76
+
77
+ attention_mask = torch.ones_like(input_ids)
78
+
79
+ input_ids = input_ids.to(self.device)
80
+ attention_mask = attention_mask.to(self.device)
81
+
82
+ if len(input_ids.shape) == 1:
83
+ input_ids = input_ids.unsqueeze(0)
84
+ attention_mask = attention_mask.unsqueeze(0)
85
+
86
+ return {
87
+ "input_ids": input_ids,
88
+ "attention_mask": attention_mask
89
+ }
90
+
91
+ except Exception as e:
92
+ logging.error(f"Error processing inputs: {str(e)}")
93
+ raise
94
+
95
+ def reward_fn(self, language, source, translations):
96
+ try:
97
+ all_rewards = []
98
+ for translation in translations:
99
+ messages = self._create_single_message(language, source, translation)
100
+ inputs = self._process_inputs(messages)
101
+ with torch.no_grad():
102
+ outputs = self.RM(**inputs, return_value=True)
103
+ rewards = outputs[2]
104
+ reward = rewards[0, -1].cpu().item()
105
+ all_rewards.append(reward)
106
+ return all_rewards
107
+ except Exception as e:
108
+ logging.error(f"Error in reward_fn: {str(e)}")
109
+ raise
110
+
111
+ def get_len(self, language, translations):
112
+ try:
113
+ len_ = 0
114
+ for translation in translations:
115
+ l = self.tokenizer(translation, return_tensors="pt").input_ids.to(device).shape[-1]
116
+ len_ += l
117
+ return len_
118
+ except Exception as e:
119
+ logging.error(f"Error in get_len: {str(e)}")
120
+ raise
121
+
122
+ # Create reward model instance with the already loaded tokenizer
123
+ reward_model = RewardModel(device, tokenizer, torch_dtype=torch.float16)
124
+ print("Models loaded successfully!")
125
+
126
+ # Helper functions from Plan2Align
127
+ def rm_predict_preference(source, translation0, translation1, language="English"):
128
+ translations = [translation0, translation1]
129
+ for t_i in range(len(translations)):
130
+ translations[t_i] = ''.join(translations[t_i]).replace('</s>',' ')
131
+ rewards = reward_model.reward_fn(language, source.replace('</s>',' '), translations)
132
+ best_index = rewards.index(max(rewards))
133
+ return best_index
134
 
135
+ def rm_find_best_translation(source, translations, language="English"):
136
+ copy_translations = translations.copy()
 
137
 
138
+ if len(translations) < 2:
139
+ return translations[0] if translations else None
 
 
 
 
 
 
 
 
 
 
 
140
 
141
+ for t_i in range(len(translations)):
142
+ translations[t_i] = ''.join(translations[t_i]).replace('</s>',' ')
143
 
144
+ rewards = reward_model.reward_fn(language, ''.join(source).replace('</s>',' '), translations)
 
145
 
146
+ print(rewards)
 
 
 
147
 
148
+ best_index = rewards.index(max(rewards))
149
 
150
+ print(f"Total translations length = {len(translations)}, and best translation index is: {best_index}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
151
 
152
+ if rewards[best_index] >= THRESHOLD:
153
+ return copy_translations[best_index]
154
+ else:
155
+ return None
 
156
 
157
+ def translate_chinese_to_english(chinese_text):
158
+ # Generate multiple translations
159
+ translations = []
160
+
161
+ # Generate three different translations with different system prompts
162
+ system_prompts = [
163
+ "You are a meticulous translator. Provide a literal, word-for-word translation that preserves the structure and meaning of each individual word.",
164
+ "You are a professional translator. Deliver a clear, formal, and precise translation that faithfully conveys the original meaning.",
165
+ "You are a creative and expressive translator. Render the text in a vivid and imaginative way, as if narrating a captivating story."
166
+ ]
167
 
168
+ for prompt in system_prompts:
169
+ messages = [
170
+ {"role": "system", "content": prompt},
171
+ {"role": "user", "content": f"Translate the following Chinese text to English:\n\n{chinese_text}"}
172
+ ]
 
 
 
 
 
 
 
 
173
 
174
+ inputs = tokenizer.apply_chat_template(messages, return_tensors="pt").to(device)
175
+
176
+ outputs = model.generate(
177
+ inputs,
178
+ max_new_tokens=512,
179
+ temperature=0.7,
180
+ top_p=0.9,
181
+ do_sample=True
182
+ )
183
+
184
+ translation = tokenizer.decode(outputs[0][inputs.shape[1]:], skip_special_tokens=True)
185
+ translations.append(translation)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
186
 
187
+ # Use reward model to find the best translation
188
+ best_translation = rm_find_best_translation(chinese_text, translations)
 
 
 
 
 
 
 
 
 
 
 
189
 
190
+ if best_translation is None:
191
+ # If no translation meets the threshold, return the first one
192
+ return translations[0]
 
 
 
193
 
194
+ return best_translation
195
+
196
+ # Gradio interface
197
+ def process_text(text):
198
+ return translate_chinese_to_english(text)
199
+
200
+ demo = gr.Interface(
201
+ fn=process_text,
202
+ inputs=gr.Textbox(lines=5, placeholder="Enter Chinese text here..."),
203
+ outputs=gr.Textbox(lines=5),
204
+ title="Chinese to English Translation with Plan2Align",
205
+ description="This app uses the Plan2Align approach to translate Chinese text to English."
206
+ )
207
 
 
208
  if __name__ == "__main__":
209
  demo.launch()