show ffmpg command in case of error

#6
by Aivo - opened
Files changed (2) hide show
  1. app.py +37 -93
  2. requirements.txt +1 -1
app.py CHANGED
@@ -12,28 +12,13 @@ import tempfile
12
  import shlex
13
  import shutil
14
 
15
- # Supported models configuration
16
- MODELS = {
17
- "deepseek-ai/DeepSeek-V3": {
18
- "base_url": "https://api.deepseek.com/v1",
19
- "env_key": "DEEPSEEK_API_KEY",
20
- },
21
- "Qwen/Qwen2.5-Coder-32B-Instruct": {
22
- "base_url": "https://api-inference.huggingface.co/v1/",
23
- "env_key": "HF_TOKEN",
24
- },
25
- }
26
-
27
- # Initialize client with first available model
28
- client = OpenAI(
29
- base_url=next(iter(MODELS.values()))["base_url"],
30
- api_key=os.environ[next(iter(MODELS.values()))["env_key"]],
31
- )
32
 
33
  allowed_medias = [
34
  ".png",
35
  ".jpg",
36
- ".webp",
37
  ".jpeg",
38
  ".tiff",
39
  ".bmp",
@@ -99,7 +84,7 @@ def get_files_infos(files):
99
  return results
100
 
101
 
102
- def get_completion(prompt, files_info, top_p, temperature, model_choice):
103
  # Create table header
104
  files_info_string = "| Type | Name | Dimensions | Duration | Audio Channels |\n"
105
  files_info_string += "|------|------|------------|-----------|--------|\n"
@@ -132,7 +117,6 @@ You are given:
132
  Your objective is to generate the SIMPLEST POSSIBLE single ffmpeg command to create the requested video.
133
 
134
  Key requirements:
135
- - First, think step-by-step about what the user is asking for and reformulate it into a clear technical specification
136
  - Use the absolute minimum number of ffmpeg options needed
137
  - Avoid complex filter chains or filter_complex if possible
138
  - Prefer simple concatenation, scaling, and basic filters
@@ -148,19 +132,15 @@ Remember: Simpler is better. Only use advanced ffmpeg features if absolutely nec
148
  },
149
  {
150
  "role": "user",
151
- "content": f"""Always output the media as video/mp4 and output file with "output.mp4".
152
- The current assets and objective follow.
153
 
154
  AVAILABLE ASSETS LIST:
155
 
156
  {files_info_string}
157
 
158
  OBJECTIVE: {prompt} and output at "output.mp4"
159
-
160
- First, think step-by-step about what I'm asking for and reformulate it into a clear technical specification.
161
- Then provide the FFMPEG command that will accomplish this task.
162
-
163
- YOUR RESPONSE:
164
  """,
165
  },
166
  ]
@@ -172,16 +152,8 @@ YOUR RESPONSE:
172
  print(msg["content"])
173
  print("=====================\n")
174
 
175
- if model_choice not in MODELS:
176
- raise ValueError(f"Model {model_choice} is not supported")
177
-
178
- model_config = MODELS[model_choice]
179
- client.base_url = model_config["base_url"]
180
- client.api_key = os.environ[model_config["env_key"]]
181
- model = "deepseek-chat" if "deepseek" in model_choice.lower() else model_choice
182
-
183
  completion = client.chat.completions.create(
184
- model=model,
185
  messages=messages,
186
  temperature=temperature,
187
  top_p=top_p,
@@ -193,23 +165,13 @@ YOUR RESPONSE:
193
  # Find content between ```sh or ```bash and the next ```
194
  import re
195
 
196
- command_match = re.search(r"```(?:sh|bash)?\n(.*?)\n```", content, re.DOTALL)
197
- if command_match:
198
- command = command_match.group(1).strip()
199
- else:
200
- # Try to find a line that starts with ffmpeg
201
- ffmpeg_lines = [line.strip() for line in content.split('\n') if line.strip().startswith('ffmpeg')]
202
- if ffmpeg_lines:
203
- command = ffmpeg_lines[0]
204
- else:
205
- command = content.replace("\n", "")
206
- else:
207
- # Try to find a line that starts with ffmpeg
208
- ffmpeg_lines = [line.strip() for line in content.split('\n') if line.strip().startswith('ffmpeg')]
209
- if ffmpeg_lines:
210
- command = ffmpeg_lines[0]
211
  else:
212
  command = content.replace("\n", "")
 
 
213
 
214
  # remove output.mp4 with the actual output file path
215
  command = command.replace("output.mp4", "")
@@ -219,13 +181,7 @@ YOUR RESPONSE:
219
  raise Exception("API Error")
220
 
221
 
222
- def update(
223
- files,
224
- prompt,
225
- top_p=1,
226
- temperature=1,
227
- model_choice="Qwen/Qwen2.5-Coder-32B-Instruct",
228
- ):
229
  if prompt == "":
230
  raise gr.Error("Please enter a prompt.")
231
 
@@ -237,16 +193,14 @@ def update(
237
  raise gr.Error(
238
  "Please make sure all videos are less than 2 minute long."
239
  )
240
- if file_info["size"] > 100000000:
241
- raise gr.Error("Please make sure all files are less than 100MB in size.")
242
 
243
  attempts = 0
244
  while attempts < 2:
245
  print("ATTEMPT", attempts)
246
  try:
247
- command_string = get_completion(
248
- prompt, files_info, top_p, temperature, model_choice
249
- )
250
  print(
251
  f"""///PROMTP {prompt} \n\n/// START OF COMMAND ///:\n\n{command_string}\n\n/// END OF COMMAND ///\n\n"""
252
  )
@@ -285,9 +239,7 @@ def update(
285
  f"\n=== EXECUTING FFMPEG COMMAND ===\nffmpeg {' '.join(final_command[1:])}\n"
286
  )
287
  subprocess.run(final_command, cwd=temp_dir)
288
- # Extract just the command for display
289
- command_for_display = f"ffmpeg {' '.join(args[1:])} -y output.mp4"
290
- generated_command = f"### Generated Command\n```bash\n{command_for_display}\n```"
291
  return output_file_path, gr.update(value=generated_command)
292
  except Exception as e:
293
  attempts += 1
@@ -300,7 +252,7 @@ with gr.Blocks() as demo:
300
  gr.Markdown(
301
  """
302
  # 🏞 AI Video Composer
303
- Compose new videos from your assets using natural language. Add video, image and audio assets and let [Qwen2.5-Coder](https://huggingface.co/Qwen/Qwen2.5-Coder-32B-Instruct) or [DeepSeek-V3](https://huggingface.co/deepseek-ai/DeepSeek-V3-Base) generate a new video for you (using FFMPEG).
304
  """,
305
  elem_id="header",
306
  )
@@ -312,16 +264,11 @@ with gr.Blocks() as demo:
312
  file_types=allowed_medias,
313
  )
314
  user_prompt = gr.Textbox(
315
- placeholder="eg: Remove the 3 first seconds of the video",
316
  label="Instructions",
317
  )
318
  btn = gr.Button("Run")
319
  with gr.Accordion("Parameters", open=False):
320
- model_choice = gr.Radio(
321
- choices=list(MODELS.keys()),
322
- value=list(MODELS.keys())[0],
323
- label="Model",
324
- )
325
  top_p = gr.Slider(
326
  minimum=-0,
327
  maximum=1.0,
@@ -346,7 +293,7 @@ with gr.Blocks() as demo:
346
 
347
  btn.click(
348
  fn=update,
349
- inputs=[user_files, user_prompt, top_p, temperature, model_choice],
350
  outputs=[generated_video, generated_command],
351
  )
352
  with gr.Row():
@@ -357,40 +304,37 @@ with gr.Blocks() as demo:
357
  "Use the image as the background with a waveform visualization for the audio positioned in center of the video.",
358
  0.7,
359
  0.1,
360
- (
361
- list(MODELS.keys())[1]
362
- if len(MODELS) > 1
363
- else list(MODELS.keys())[0]
364
- ),
365
- ],
366
- [
367
- ["./examples/ai_talk.wav", "./examples/bg-image.png"],
368
- "Use the image as the background with a waveform visualization for the audio positioned in center of the video. Make sure the waveform has a max height of 250 pixels.",
369
- 0.7,
370
- 0.1,
371
- list(MODELS.keys())[0],
372
  ],
373
  [
374
  [
 
375
  "./examples/cat1.jpeg",
376
  "./examples/cat2.jpeg",
377
  "./examples/cat3.jpeg",
378
  "./examples/cat4.jpeg",
379
  "./examples/cat5.jpeg",
380
  "./examples/cat6.jpeg",
 
381
  "./examples/heat-wave.mp3",
382
  ],
383
- "Create a 3x2 grid of the cat images with the audio as background music. Make the video duration match the audio duration.",
 
 
 
 
 
 
 
 
 
 
 
 
384
  0.7,
385
  0.1,
386
- (
387
- list(MODELS.keys())[1]
388
- if len(MODELS) > 1
389
- else list(MODELS.keys())[0]
390
- ),
391
  ],
392
  ],
393
- inputs=[user_files, user_prompt, top_p, temperature, model_choice],
394
  outputs=[generated_video, generated_command],
395
  fn=update,
396
  run_on_click=True,
 
12
  import shlex
13
  import shutil
14
 
15
+ HF_API_KEY = os.environ["HF_TOKEN"]
16
+
17
+ client = OpenAI(base_url="https://api-inference.huggingface.co/v1/", api_key=HF_API_KEY)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
 
19
  allowed_medias = [
20
  ".png",
21
  ".jpg",
 
22
  ".jpeg",
23
  ".tiff",
24
  ".bmp",
 
84
  return results
85
 
86
 
87
+ def get_completion(prompt, files_info, top_p, temperature):
88
  # Create table header
89
  files_info_string = "| Type | Name | Dimensions | Duration | Audio Channels |\n"
90
  files_info_string += "|------|------|------------|-----------|--------|\n"
 
117
  Your objective is to generate the SIMPLEST POSSIBLE single ffmpeg command to create the requested video.
118
 
119
  Key requirements:
 
120
  - Use the absolute minimum number of ffmpeg options needed
121
  - Avoid complex filter chains or filter_complex if possible
122
  - Prefer simple concatenation, scaling, and basic filters
 
132
  },
133
  {
134
  "role": "user",
135
+ "content": f"""Always output the media as video/mp4 and output file with "output.mp4". Provide only the shell command without any explanations.
136
+ The current assets and objective follow. Reply with the FFMPEG command:
137
 
138
  AVAILABLE ASSETS LIST:
139
 
140
  {files_info_string}
141
 
142
  OBJECTIVE: {prompt} and output at "output.mp4"
143
+ YOUR FFMPEG COMMAND:
 
 
 
 
144
  """,
145
  },
146
  ]
 
152
  print(msg["content"])
153
  print("=====================\n")
154
 
 
 
 
 
 
 
 
 
155
  completion = client.chat.completions.create(
156
+ model="Qwen/Qwen2.5-Coder-32B-Instruct",
157
  messages=messages,
158
  temperature=temperature,
159
  top_p=top_p,
 
165
  # Find content between ```sh or ```bash and the next ```
166
  import re
167
 
168
+ command = re.search(r"```(?:sh|bash)?\n(.*?)\n```", content, re.DOTALL)
169
+ if command:
170
+ command = command.group(1).strip()
 
 
 
 
 
 
 
 
 
 
 
 
171
  else:
172
  command = content.replace("\n", "")
173
+ else:
174
+ command = content.replace("\n", "")
175
 
176
  # remove output.mp4 with the actual output file path
177
  command = command.replace("output.mp4", "")
 
181
  raise Exception("API Error")
182
 
183
 
184
+ def update(files, prompt, top_p=1, temperature=1):
 
 
 
 
 
 
185
  if prompt == "":
186
  raise gr.Error("Please enter a prompt.")
187
 
 
193
  raise gr.Error(
194
  "Please make sure all videos are less than 2 minute long."
195
  )
196
+ if file_info["size"] > 10000000:
197
+ raise gr.Error("Please make sure all files are less than 10MB in size.")
198
 
199
  attempts = 0
200
  while attempts < 2:
201
  print("ATTEMPT", attempts)
202
  try:
203
+ command_string = get_completion(prompt, files_info, top_p, temperature)
 
 
204
  print(
205
  f"""///PROMTP {prompt} \n\n/// START OF COMMAND ///:\n\n{command_string}\n\n/// END OF COMMAND ///\n\n"""
206
  )
 
239
  f"\n=== EXECUTING FFMPEG COMMAND ===\nffmpeg {' '.join(final_command[1:])}\n"
240
  )
241
  subprocess.run(final_command, cwd=temp_dir)
242
+ generated_command = f"### Generated Command\n```bash\nffmpeg {' '.join(args[1:])} -y output.mp4\n```"
 
 
243
  return output_file_path, gr.update(value=generated_command)
244
  except Exception as e:
245
  attempts += 1
 
252
  gr.Markdown(
253
  """
254
  # 🏞 AI Video Composer
255
+ Compose new videos from your assets using natural language. Add video, image and audio assets and let [Qwen2.5-Coder](https://huggingface.co/Qwen/Qwen2.5-Coder-32B-Instruct) generate a new video for you (using FFMPEG).
256
  """,
257
  elem_id="header",
258
  )
 
264
  file_types=allowed_medias,
265
  )
266
  user_prompt = gr.Textbox(
267
+ placeholder="I want to convert to a gif under 15mb",
268
  label="Instructions",
269
  )
270
  btn = gr.Button("Run")
271
  with gr.Accordion("Parameters", open=False):
 
 
 
 
 
272
  top_p = gr.Slider(
273
  minimum=-0,
274
  maximum=1.0,
 
293
 
294
  btn.click(
295
  fn=update,
296
+ inputs=[user_files, user_prompt, top_p, temperature],
297
  outputs=[generated_video, generated_command],
298
  )
299
  with gr.Row():
 
304
  "Use the image as the background with a waveform visualization for the audio positioned in center of the video.",
305
  0.7,
306
  0.1,
 
 
 
 
 
 
 
 
 
 
 
 
307
  ],
308
  [
309
  [
310
+ "./examples/cat8.jpeg",
311
  "./examples/cat1.jpeg",
312
  "./examples/cat2.jpeg",
313
  "./examples/cat3.jpeg",
314
  "./examples/cat4.jpeg",
315
  "./examples/cat5.jpeg",
316
  "./examples/cat6.jpeg",
317
+ "./examples/cat7.jpeg",
318
  "./examples/heat-wave.mp3",
319
  ],
320
+ "Generate an MP4 slideshow where each photo appears for 2 seconds, using the provided audio as soundtrack.",
321
+ 0.7,
322
+ 0.1,
323
+ ],
324
+ [
325
+ ["./examples/waterfall-overlay.png", "./examples/waterfall.mp4"],
326
+ "Add the overlay to the video.",
327
+ 0.7,
328
+ 0.1,
329
+ ],
330
+ [
331
+ ["./examples/example.mp4"],
332
+ "Make this video 10 times faster",
333
  0.7,
334
  0.1,
 
 
 
 
 
335
  ],
336
  ],
337
+ inputs=[user_files, user_prompt, top_p, temperature],
338
  outputs=[generated_video, generated_command],
339
  fn=update,
340
  run_on_click=True,
requirements.txt CHANGED
@@ -1,3 +1,3 @@
1
  openai>=1.55.0
2
- gradio==5.9.1
3
  moviepy==1
 
1
  openai>=1.55.0
2
+ gradio==5.6.0
3
  moviepy==1