baconnier commited on
Commit
623abec
·
verified ·
1 Parent(s): caca082

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +17 -43
app.py CHANGED
@@ -13,7 +13,6 @@ import torch
13
  import numpy as np
14
  from PIL import Image
15
  import cv2
16
- import translators as ts
17
 
18
  from transformers import (
19
  AutoModelForCausalLM,
@@ -100,22 +99,8 @@ ENABLE_CPU_OFFLOAD = os.getenv("ENABLE_CPU_OFFLOAD", "0") == "1"
100
 
101
  dtype = torch.float16 if device.type == "cuda" else torch.float32
102
 
103
- # NAPOLEON 4B MULTIMODAL MODEL - Remplacer Gemma3 par Napoleon
104
- napoleon_model_id = "baconnier/Napoleon_4B_V0.0"
105
- napoleon_model = AutoModelForCausalLM.from_pretrained(
106
- napoleon_model_id, device_map="auto", torch_dtype=torch.bfloat16
107
- ).eval()
108
- napoleon_processor = AutoProcessor.from_pretrained(napoleon_model_id)
109
-
110
- # Fonction de traduction
111
- def translate_text(text, target_lang="fr", source_lang="auto"):
112
- try:
113
- return ts.deepl(text, from_language=source_lang, to_language=target_lang)
114
- except:
115
- try:
116
- return ts.google(text, from_language=source_lang, to_language=target_lang)
117
- except:
118
- return text # Retourner le texte original en cas d'échec
119
 
120
  # VIDEO PROCESSING HELPER
121
  def downsample_video(video_path):
@@ -158,9 +143,6 @@ def generate(
158
  # Remove the napoleon flag from the prompt.
159
  prompt_clean = re.sub(r"@napoleon", "", text, flags=re.IGNORECASE).strip().strip('"')
160
 
161
- # Traduire en français si le texte n'est pas déjà en français
162
- prompt_clean_fr = translate_text(prompt_clean, target_lang="fr")
163
-
164
  if files:
165
  # If image files are provided, load them.
166
  images = [load_image(f) for f in files]
@@ -168,26 +150,26 @@ def generate(
168
  "role": "user",
169
  "content": [
170
  *[{"type": "image", "image": image} for image in images],
171
- {"type": "text", "text": prompt_clean_fr},
172
  ]
173
  }]
174
  else:
175
  messages = [
176
  {"role": "system", "content": [{"type": "text", "text": "Vous êtes un assistant utile qui parle français."}]},
177
- {"role": "user", "content": [{"type": "text", "text": prompt_clean_fr}]}
178
  ]
179
 
180
- inputs = napoleon_processor.apply_chat_template(
181
  messages, add_generation_prompt=True, tokenize=True,
182
  return_dict=True, return_tensors="pt"
183
- ).to(napoleon_model.device, dtype=torch.bfloat16)
184
 
185
  streamer = TextIteratorStreamer(
186
- napoleon_processor.tokenizer, timeout=20.0, skip_prompt=True, skip_special_tokens=True
187
  )
188
 
189
  generation_kwargs = {
190
- **inputs,
191
  "streamer": streamer,
192
  "max_new_tokens": max_new_tokens,
193
  "do_sample": True,
@@ -197,7 +179,7 @@ def generate(
197
  "repetition_penalty": repetition_penalty,
198
  }
199
 
200
- thread = Thread(target=napoleon_model.generate, kwargs=generation_kwargs)
201
  thread.start()
202
 
203
  buffer = ""
@@ -213,9 +195,6 @@ def generate(
213
  # Remove the video flag from the prompt.
214
  prompt_clean = re.sub(r"@video", "", text, flags=re.IGNORECASE).strip().strip('"')
215
 
216
- # Traduire en français si le texte n'est pas déjà en français
217
- prompt_clean_fr = translate_text(prompt_clean, target_lang="fr")
218
-
219
  if files:
220
  # Assume the first file is a video.
221
  video_path = files[0]
@@ -223,7 +202,7 @@ def generate(
223
 
224
  messages = [
225
  {"role": "system", "content": [{"type": "text", "text": "Vous êtes un assistant utile qui parle français."}]},
226
- {"role": "user", "content": [{"type": "text", "text": prompt_clean_fr}]}
227
  ]
228
 
229
  # Append each frame as an image with a timestamp label.
@@ -236,20 +215,20 @@ def generate(
236
  else:
237
  messages = [
238
  {"role": "system", "content": [{"type": "text", "text": "Vous êtes un assistant utile qui parle français."}]},
239
- {"role": "user", "content": [{"type": "text", "text": prompt_clean_fr}]}
240
  ]
241
 
242
- inputs = napoleon_processor.apply_chat_template(
243
  messages, add_generation_prompt=True, tokenize=True,
244
  return_dict=True, return_tensors="pt"
245
- ).to(napoleon_model.device, dtype=torch.bfloat16)
246
 
247
  streamer = TextIteratorStreamer(
248
- napoleon_processor.tokenizer, timeout=20.0, skip_prompt=True, skip_special_tokens=True
249
  )
250
 
251
  generation_kwargs = {
252
- **inputs,
253
  "streamer": streamer,
254
  "max_new_tokens": max_new_tokens,
255
  "do_sample": True,
@@ -259,7 +238,7 @@ def generate(
259
  "repetition_penalty": repetition_penalty,
260
  }
261
 
262
- thread = Thread(target=napoleon_model.generate, kwargs=generation_kwargs)
263
  thread.start()
264
 
265
  buffer = ""
@@ -298,12 +277,7 @@ def generate(
298
  time.sleep(0.01)
299
  yield buffer
300
  else:
301
- # Traduire le texte en français pour Napoleon
302
- text_fr = translate_text(text, target_lang="fr")
303
- conversation_fr = clean_chat_history(chat_history)
304
- conversation_fr.append({"role": "user", "content": text_fr})
305
-
306
- input_ids = tokenizer.apply_chat_template(conversation_fr, add_generation_prompt=True, return_tensors="pt")
307
  if input_ids.shape[1] > MAX_INPUT_TOKEN_LENGTH:
308
  input_ids = input_ids[:, -MAX_INPUT_TOKEN_LENGTH:]
309
  gr.Warning(f"Texte d'entrée tronqué car plus long que {MAX_INPUT_TOKEN_LENGTH} tokens.")
 
13
  import numpy as np
14
  from PIL import Image
15
  import cv2
 
16
 
17
  from transformers import (
18
  AutoModelForCausalLM,
 
99
 
100
  dtype = torch.float16 if device.type == "cuda" else torch.float32
101
 
102
+ # NAPOLEON 4B MULTIMODAL MODEL - Pour le traitement des images et vidéos
103
+ napoleon_processor = AutoProcessor.from_pretrained(model_id)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
104
 
105
  # VIDEO PROCESSING HELPER
106
  def downsample_video(video_path):
 
143
  # Remove the napoleon flag from the prompt.
144
  prompt_clean = re.sub(r"@napoleon", "", text, flags=re.IGNORECASE).strip().strip('"')
145
 
 
 
 
146
  if files:
147
  # If image files are provided, load them.
148
  images = [load_image(f) for f in files]
 
150
  "role": "user",
151
  "content": [
152
  *[{"type": "image", "image": image} for image in images],
153
+ {"type": "text", "text": prompt_clean},
154
  ]
155
  }]
156
  else:
157
  messages = [
158
  {"role": "system", "content": [{"type": "text", "text": "Vous êtes un assistant utile qui parle français."}]},
159
+ {"role": "user", "content": [{"type": "text", "text": prompt_clean}]}
160
  ]
161
 
162
+ inputs = tokenizer.apply_chat_template(
163
  messages, add_generation_prompt=True, tokenize=True,
164
  return_dict=True, return_tensors="pt"
165
+ ).to(model.device, dtype=torch.bfloat16)
166
 
167
  streamer = TextIteratorStreamer(
168
+ tokenizer, timeout=20.0, skip_prompt=True, skip_special_tokens=True
169
  )
170
 
171
  generation_kwargs = {
172
+ "input_ids": inputs,
173
  "streamer": streamer,
174
  "max_new_tokens": max_new_tokens,
175
  "do_sample": True,
 
179
  "repetition_penalty": repetition_penalty,
180
  }
181
 
182
+ thread = Thread(target=model.generate, kwargs=generation_kwargs)
183
  thread.start()
184
 
185
  buffer = ""
 
195
  # Remove the video flag from the prompt.
196
  prompt_clean = re.sub(r"@video", "", text, flags=re.IGNORECASE).strip().strip('"')
197
 
 
 
 
198
  if files:
199
  # Assume the first file is a video.
200
  video_path = files[0]
 
202
 
203
  messages = [
204
  {"role": "system", "content": [{"type": "text", "text": "Vous êtes un assistant utile qui parle français."}]},
205
+ {"role": "user", "content": [{"type": "text", "text": prompt_clean}]}
206
  ]
207
 
208
  # Append each frame as an image with a timestamp label.
 
215
  else:
216
  messages = [
217
  {"role": "system", "content": [{"type": "text", "text": "Vous êtes un assistant utile qui parle français."}]},
218
+ {"role": "user", "content": [{"type": "text", "text": prompt_clean}]}
219
  ]
220
 
221
+ inputs = tokenizer.apply_chat_template(
222
  messages, add_generation_prompt=True, tokenize=True,
223
  return_dict=True, return_tensors="pt"
224
+ ).to(model.device, dtype=torch.bfloat16)
225
 
226
  streamer = TextIteratorStreamer(
227
+ tokenizer, timeout=20.0, skip_prompt=True, skip_special_tokens=True
228
  )
229
 
230
  generation_kwargs = {
231
+ "input_ids": inputs,
232
  "streamer": streamer,
233
  "max_new_tokens": max_new_tokens,
234
  "do_sample": True,
 
238
  "repetition_penalty": repetition_penalty,
239
  }
240
 
241
+ thread = Thread(target=model.generate, kwargs=generation_kwargs)
242
  thread.start()
243
 
244
  buffer = ""
 
277
  time.sleep(0.01)
278
  yield buffer
279
  else:
280
+ input_ids = tokenizer.apply_chat_template(conversation, add_generation_prompt=True, return_tensors="pt")
 
 
 
 
 
281
  if input_ids.shape[1] > MAX_INPUT_TOKEN_LENGTH:
282
  input_ids = input_ids[:, -MAX_INPUT_TOKEN_LENGTH:]
283
  gr.Warning(f"Texte d'entrée tronqué car plus long que {MAX_INPUT_TOKEN_LENGTH} tokens.")