Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
@@ -13,7 +13,6 @@ import torch
|
|
13 |
import numpy as np
|
14 |
from PIL import Image
|
15 |
import cv2
|
16 |
-
import translators as ts
|
17 |
|
18 |
from transformers import (
|
19 |
AutoModelForCausalLM,
|
@@ -100,22 +99,8 @@ ENABLE_CPU_OFFLOAD = os.getenv("ENABLE_CPU_OFFLOAD", "0") == "1"
|
|
100 |
|
101 |
dtype = torch.float16 if device.type == "cuda" else torch.float32
|
102 |
|
103 |
-
# NAPOLEON 4B MULTIMODAL MODEL -
|
104 |
-
|
105 |
-
napoleon_model = AutoModelForCausalLM.from_pretrained(
|
106 |
-
napoleon_model_id, device_map="auto", torch_dtype=torch.bfloat16
|
107 |
-
).eval()
|
108 |
-
napoleon_processor = AutoProcessor.from_pretrained(napoleon_model_id)
|
109 |
-
|
110 |
-
# Fonction de traduction
|
111 |
-
def translate_text(text, target_lang="fr", source_lang="auto"):
|
112 |
-
try:
|
113 |
-
return ts.deepl(text, from_language=source_lang, to_language=target_lang)
|
114 |
-
except:
|
115 |
-
try:
|
116 |
-
return ts.google(text, from_language=source_lang, to_language=target_lang)
|
117 |
-
except:
|
118 |
-
return text # Retourner le texte original en cas d'échec
|
119 |
|
120 |
# VIDEO PROCESSING HELPER
|
121 |
def downsample_video(video_path):
|
@@ -158,9 +143,6 @@ def generate(
|
|
158 |
# Remove the napoleon flag from the prompt.
|
159 |
prompt_clean = re.sub(r"@napoleon", "", text, flags=re.IGNORECASE).strip().strip('"')
|
160 |
|
161 |
-
# Traduire en français si le texte n'est pas déjà en français
|
162 |
-
prompt_clean_fr = translate_text(prompt_clean, target_lang="fr")
|
163 |
-
|
164 |
if files:
|
165 |
# If image files are provided, load them.
|
166 |
images = [load_image(f) for f in files]
|
@@ -168,26 +150,26 @@ def generate(
|
|
168 |
"role": "user",
|
169 |
"content": [
|
170 |
*[{"type": "image", "image": image} for image in images],
|
171 |
-
{"type": "text", "text":
|
172 |
]
|
173 |
}]
|
174 |
else:
|
175 |
messages = [
|
176 |
{"role": "system", "content": [{"type": "text", "text": "Vous êtes un assistant utile qui parle français."}]},
|
177 |
-
{"role": "user", "content": [{"type": "text", "text":
|
178 |
]
|
179 |
|
180 |
-
inputs =
|
181 |
messages, add_generation_prompt=True, tokenize=True,
|
182 |
return_dict=True, return_tensors="pt"
|
183 |
-
).to(
|
184 |
|
185 |
streamer = TextIteratorStreamer(
|
186 |
-
|
187 |
)
|
188 |
|
189 |
generation_kwargs = {
|
190 |
-
|
191 |
"streamer": streamer,
|
192 |
"max_new_tokens": max_new_tokens,
|
193 |
"do_sample": True,
|
@@ -197,7 +179,7 @@ def generate(
|
|
197 |
"repetition_penalty": repetition_penalty,
|
198 |
}
|
199 |
|
200 |
-
thread = Thread(target=
|
201 |
thread.start()
|
202 |
|
203 |
buffer = ""
|
@@ -213,9 +195,6 @@ def generate(
|
|
213 |
# Remove the video flag from the prompt.
|
214 |
prompt_clean = re.sub(r"@video", "", text, flags=re.IGNORECASE).strip().strip('"')
|
215 |
|
216 |
-
# Traduire en français si le texte n'est pas déjà en français
|
217 |
-
prompt_clean_fr = translate_text(prompt_clean, target_lang="fr")
|
218 |
-
|
219 |
if files:
|
220 |
# Assume the first file is a video.
|
221 |
video_path = files[0]
|
@@ -223,7 +202,7 @@ def generate(
|
|
223 |
|
224 |
messages = [
|
225 |
{"role": "system", "content": [{"type": "text", "text": "Vous êtes un assistant utile qui parle français."}]},
|
226 |
-
{"role": "user", "content": [{"type": "text", "text":
|
227 |
]
|
228 |
|
229 |
# Append each frame as an image with a timestamp label.
|
@@ -236,20 +215,20 @@ def generate(
|
|
236 |
else:
|
237 |
messages = [
|
238 |
{"role": "system", "content": [{"type": "text", "text": "Vous êtes un assistant utile qui parle français."}]},
|
239 |
-
{"role": "user", "content": [{"type": "text", "text":
|
240 |
]
|
241 |
|
242 |
-
inputs =
|
243 |
messages, add_generation_prompt=True, tokenize=True,
|
244 |
return_dict=True, return_tensors="pt"
|
245 |
-
).to(
|
246 |
|
247 |
streamer = TextIteratorStreamer(
|
248 |
-
|
249 |
)
|
250 |
|
251 |
generation_kwargs = {
|
252 |
-
|
253 |
"streamer": streamer,
|
254 |
"max_new_tokens": max_new_tokens,
|
255 |
"do_sample": True,
|
@@ -259,7 +238,7 @@ def generate(
|
|
259 |
"repetition_penalty": repetition_penalty,
|
260 |
}
|
261 |
|
262 |
-
thread = Thread(target=
|
263 |
thread.start()
|
264 |
|
265 |
buffer = ""
|
@@ -298,12 +277,7 @@ def generate(
|
|
298 |
time.sleep(0.01)
|
299 |
yield buffer
|
300 |
else:
|
301 |
-
|
302 |
-
text_fr = translate_text(text, target_lang="fr")
|
303 |
-
conversation_fr = clean_chat_history(chat_history)
|
304 |
-
conversation_fr.append({"role": "user", "content": text_fr})
|
305 |
-
|
306 |
-
input_ids = tokenizer.apply_chat_template(conversation_fr, add_generation_prompt=True, return_tensors="pt")
|
307 |
if input_ids.shape[1] > MAX_INPUT_TOKEN_LENGTH:
|
308 |
input_ids = input_ids[:, -MAX_INPUT_TOKEN_LENGTH:]
|
309 |
gr.Warning(f"Texte d'entrée tronqué car plus long que {MAX_INPUT_TOKEN_LENGTH} tokens.")
|
|
|
13 |
import numpy as np
|
14 |
from PIL import Image
|
15 |
import cv2
|
|
|
16 |
|
17 |
from transformers import (
|
18 |
AutoModelForCausalLM,
|
|
|
99 |
|
100 |
dtype = torch.float16 if device.type == "cuda" else torch.float32
|
101 |
|
102 |
+
# NAPOLEON 4B MULTIMODAL MODEL - Pour le traitement des images et vidéos
|
103 |
+
napoleon_processor = AutoProcessor.from_pretrained(model_id)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
104 |
|
105 |
# VIDEO PROCESSING HELPER
|
106 |
def downsample_video(video_path):
|
|
|
143 |
# Remove the napoleon flag from the prompt.
|
144 |
prompt_clean = re.sub(r"@napoleon", "", text, flags=re.IGNORECASE).strip().strip('"')
|
145 |
|
|
|
|
|
|
|
146 |
if files:
|
147 |
# If image files are provided, load them.
|
148 |
images = [load_image(f) for f in files]
|
|
|
150 |
"role": "user",
|
151 |
"content": [
|
152 |
*[{"type": "image", "image": image} for image in images],
|
153 |
+
{"type": "text", "text": prompt_clean},
|
154 |
]
|
155 |
}]
|
156 |
else:
|
157 |
messages = [
|
158 |
{"role": "system", "content": [{"type": "text", "text": "Vous êtes un assistant utile qui parle français."}]},
|
159 |
+
{"role": "user", "content": [{"type": "text", "text": prompt_clean}]}
|
160 |
]
|
161 |
|
162 |
+
inputs = tokenizer.apply_chat_template(
|
163 |
messages, add_generation_prompt=True, tokenize=True,
|
164 |
return_dict=True, return_tensors="pt"
|
165 |
+
).to(model.device, dtype=torch.bfloat16)
|
166 |
|
167 |
streamer = TextIteratorStreamer(
|
168 |
+
tokenizer, timeout=20.0, skip_prompt=True, skip_special_tokens=True
|
169 |
)
|
170 |
|
171 |
generation_kwargs = {
|
172 |
+
"input_ids": inputs,
|
173 |
"streamer": streamer,
|
174 |
"max_new_tokens": max_new_tokens,
|
175 |
"do_sample": True,
|
|
|
179 |
"repetition_penalty": repetition_penalty,
|
180 |
}
|
181 |
|
182 |
+
thread = Thread(target=model.generate, kwargs=generation_kwargs)
|
183 |
thread.start()
|
184 |
|
185 |
buffer = ""
|
|
|
195 |
# Remove the video flag from the prompt.
|
196 |
prompt_clean = re.sub(r"@video", "", text, flags=re.IGNORECASE).strip().strip('"')
|
197 |
|
|
|
|
|
|
|
198 |
if files:
|
199 |
# Assume the first file is a video.
|
200 |
video_path = files[0]
|
|
|
202 |
|
203 |
messages = [
|
204 |
{"role": "system", "content": [{"type": "text", "text": "Vous êtes un assistant utile qui parle français."}]},
|
205 |
+
{"role": "user", "content": [{"type": "text", "text": prompt_clean}]}
|
206 |
]
|
207 |
|
208 |
# Append each frame as an image with a timestamp label.
|
|
|
215 |
else:
|
216 |
messages = [
|
217 |
{"role": "system", "content": [{"type": "text", "text": "Vous êtes un assistant utile qui parle français."}]},
|
218 |
+
{"role": "user", "content": [{"type": "text", "text": prompt_clean}]}
|
219 |
]
|
220 |
|
221 |
+
inputs = tokenizer.apply_chat_template(
|
222 |
messages, add_generation_prompt=True, tokenize=True,
|
223 |
return_dict=True, return_tensors="pt"
|
224 |
+
).to(model.device, dtype=torch.bfloat16)
|
225 |
|
226 |
streamer = TextIteratorStreamer(
|
227 |
+
tokenizer, timeout=20.0, skip_prompt=True, skip_special_tokens=True
|
228 |
)
|
229 |
|
230 |
generation_kwargs = {
|
231 |
+
"input_ids": inputs,
|
232 |
"streamer": streamer,
|
233 |
"max_new_tokens": max_new_tokens,
|
234 |
"do_sample": True,
|
|
|
238 |
"repetition_penalty": repetition_penalty,
|
239 |
}
|
240 |
|
241 |
+
thread = Thread(target=model.generate, kwargs=generation_kwargs)
|
242 |
thread.start()
|
243 |
|
244 |
buffer = ""
|
|
|
277 |
time.sleep(0.01)
|
278 |
yield buffer
|
279 |
else:
|
280 |
+
input_ids = tokenizer.apply_chat_template(conversation, add_generation_prompt=True, return_tensors="pt")
|
|
|
|
|
|
|
|
|
|
|
281 |
if input_ids.shape[1] > MAX_INPUT_TOKEN_LENGTH:
|
282 |
input_ids = input_ids[:, -MAX_INPUT_TOKEN_LENGTH:]
|
283 |
gr.Warning(f"Texte d'entrée tronqué car plus long que {MAX_INPUT_TOKEN_LENGTH} tokens.")
|