vsrinivas commited on
Commit
4eab1ba
·
verified ·
1 Parent(s): b17793d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +155 -145
app.py CHANGED
@@ -186,93 +186,100 @@ def synthesize_speech(video, source_language,target_language):
186
  target_language = target_language)
187
  return dub_video
188
 
189
- # This function handles the processing when any participant speaks
190
- def process_speaker(video, speaker_idx, n_participants, *language_list):
191
- transcript = speech_to_text(video)
192
 
193
- # Create outputs for each participant
194
- outputs = []
195
- global meeting_texts
196
- def process_translation_dubbing(i):
197
- if i != speaker_idx:
198
- participant_language = language_codes[language_list[i]]
199
- speaker_language = language_codes[language_list[speaker_idx]]
200
- translated_text = translate_text(transcript, speaker_language, participant_language)
201
- dubbed_video = synthesize_speech(video, speaker_language, participant_language)
202
- return translated_text, dubbed_video
203
- return None, None
204
 
205
- with concurrent.futures.ThreadPoolExecutor() as executor:
206
- futures = [executor.submit(process_translation_dubbing, i) for i in range(n_participants)]
207
- results = [f.result() for f in futures]
208
 
209
- for i, (translated_text, dubbed_video) in enumerate(results):
210
- if i == speaker_idx:
211
- outputs.insert(0, transcript)
212
- else:
213
- outputs.append(translated_text)
214
- outputs.append(dubbed_video)
215
- if speaker_idx == 0:
216
- meeting_texts.append({f"Speaker_{speaker_idx+1}":outputs[0]})
217
- else:
218
- meeting_texts.append({f"Speaker_{speaker_idx+1}":outputs[1]})
219
 
220
- print(len(outputs))
221
- print(outputs)
222
- print('meeting_texts: ',meeting_texts)
223
- return outputs
224
 
225
- def create_participant_row(i, language_choices):
226
- """Creates the UI for a single participant."""
227
- with gr.Row():
228
- video_input = gr.Video(label=f"Participant {i+1} Video", interactive=True)
229
- language_dropdown = gr.Dropdown(choices=language_choices, label=f"Participant {i+1} Language", value=language_choices[i])
230
- transcript_output = gr.Textbox(label=f"Participant {i+1} Transcript")
231
- translated_text = gr.Textbox(label="Speaker's Translated Text")
232
- dubbed_video = gr.Video(label="Speaker's Dubbed Video")
233
- return video_input, language_dropdown, transcript_output, translated_text, dubbed_video
234
 
235
- # Main dynamic Gradio interface
236
- def create_gradio_interface(n_participants, language_choices):
237
- with gr.Blocks() as demo:
238
- gr.Markdown("""# LinguaPolis: Bridging Languages, Uniting Teams Globally - Multilingual Conference Call Simulation
239
- ## Record your video or upload your video and press the corresponding Submit button at the bottom""")
240
 
241
- video_inputs = []
242
- language_dropdowns = []
243
- transcript_outputs = []
244
- translated_texts = []
245
- dubbed_videos = []
246
 
247
- clear_button = gr.Button("Clear All")
248
 
249
- # Create a row for each participant
250
- for i in range(n_participants):
251
- video_input, language_dropdown, transcript_output, translated_text, dubbed_video = create_participant_row(i, language_choices)
252
- video_inputs.append(video_input)
253
- language_dropdowns.append(language_dropdown)
254
- transcript_outputs.append(transcript_output)
255
- translated_texts.append(translated_text)
256
- dubbed_videos.append(dubbed_video)
257
 
258
- # Create dynamic processing buttons for each participant
259
- for i in range(n_participants):
260
- gr.Button(f"Submit Speaker {i+1}'s Speech").click(
261
- process_speaker,
262
- [video_inputs[i], gr.State(i), gr.State(n_participants)] + [language_dropdowns[j] for j in range(n_participants)],
263
- [transcript_outputs[i]] + [k for j in zip(translated_texts[:i]+translated_texts[i+1:], dubbed_videos[:i]+dubbed_videos[i+1:]) for k in j]
264
- )
265
- minutes = gr.Textbox(label="Minutes of Meeting")
266
- gr.Button(f"Generate Minutes of meeting").click(summarize, None, minutes)
267
 
268
- # Clear button to reset inputs and outputs
269
- clear_button.click(clear_all, None, [*video_inputs, *transcript_outputs, *translated_texts, *dubbed_videos, minutes])
 
 
 
 
 
 
 
 
 
 
270
 
271
- # Launch with .queue() to keep it running properly in Jupyter
272
- demo.queue().launch(debug=True, share=True)
273
 
274
 
275
- create_gradio_interface(n_participants, language_choices)
276
 
277
  # def create_dub_from_file(
278
  # input_file_path: str,
@@ -373,92 +380,95 @@ create_gradio_interface(n_participants, language_choices)
373
  # return dub_video
374
 
375
 
376
- # # Update process_speaker function to accept and return meeting_texts
377
- # def process_speaker(video, speaker_idx, n_participants, meeting_texts, *language_list):
378
- # transcript = speech_to_text(video)
379
 
380
- # # Create outputs for each participant
381
- # outputs = []
382
 
383
- # def process_translation_dubbing(i):
384
- # if i != speaker_idx:
385
- # participant_language = language_codes[language_list[i]]
386
- # speaker_language = language_codes[language_list[speaker_idx]]
387
- # translated_text = translate_text(transcript, speaker_language, participant_language)
388
- # dubbed_video = synthesize_speech(video, speaker_language, participant_language)
389
- # return translated_text, dubbed_video
390
- # return None, None
391
 
392
- # with concurrent.futures.ThreadPoolExecutor() as executor:
393
- # futures = [executor.submit(process_translation_dubbing, i) for i in range(n_participants)]
394
- # results = [f.result() for f in futures]
395
 
396
- # for i, (translated_text, dubbed_video) in enumerate(results):
397
- # if i == speaker_idx:
398
- # outputs.insert(0, transcript)
399
- # else:
400
- # outputs.append(translated_text)
401
- # outputs.append(dubbed_video)
402
 
403
- # if speaker_idx == 0:
404
- # meeting_texts.append({f"Speaker_{speaker_idx+1}": outputs[0]})
405
- # else:
406
- # meeting_texts.append({f"Speaker_{speaker_idx+1}": outputs[1]})
407
 
408
- # print("meeting_texts:", meeting_texts)
409
- # print('outputs:', outputs)
410
- # outputs.append(meeting_texts)
411
- # return outputs
 
 
 
412
 
413
 
414
- # def create_participant_row(i, language_choices):
415
- # """Creates the UI for a single participant."""
416
- # with gr.Row():
417
- # video_input = gr.Video(label=f"Participant {i+1} Video", interactive=True)
418
- # language_dropdown = gr.Dropdown(choices=language_choices, label=f"Participant {i+1} Language", value=language_choices[i])
419
- # transcript_output = gr.Textbox(label=f"Participant {i+1} Transcript")
420
- # translated_text = gr.Textbox(label="Speaker's Translated Text")
421
- # dubbed_video = gr.Video(label="Speaker's Dubbed Video")
422
- # return video_input, language_dropdown, transcript_output, translated_text, dubbed_video
423
 
424
 
425
- # # Modify the Gradio interface to manage the meeting_texts between function calls
426
- # def create_gradio_interface(n_participants, language_choices):
427
- # with gr.Blocks() as demo:
428
- # gr.Markdown("""# LinguaPolis: Bridging Languages, Uniting Teams Globally - Multilingual Conference Call Simulation
429
- # ## Record your video or upload your video and press the corresponding Submit button at the bottom""")
430
 
431
- # video_inputs = []
432
- # language_dropdowns = []
433
- # transcript_outputs = []
434
- # translated_texts = []
435
- # dubbed_videos = []
436
 
437
- # clear_button = gr.Button("Clear All")
438
- # meeting_texts = gr.State([]) # Initialize meeting_texts as a Gradio State
439
 
440
- # # Create a row for each participant
441
- # for i in range(n_participants):
442
- # video_input, language_dropdown, transcript_output, translated_text, dubbed_video = create_participant_row(i, language_choices)
443
- # video_inputs.append(video_input)
444
- # language_dropdowns.append(language_dropdown)
445
- # transcript_outputs.append(transcript_output)
446
- # translated_texts.append(translated_text)
447
- # dubbed_videos.append(dubbed_video)
448
 
449
- # # Create dynamic processing buttons for each participant
450
- # for i in range(n_participants):
451
- # gr.Button(f"Submit Speaker {i+1}'s Speech").click(
452
- # process_speaker,
453
- # [video_inputs[i], gr.State(i), gr.State(n_participants), meeting_texts] + [language_dropdowns[j] for j in range(n_participants)],
454
- # [transcript_outputs[i]] + [k for j in zip(translated_texts[:i]+translated_texts[i+1:], dubbed_videos[:i]+dubbed_videos[i+1:]) for k in j] + [meeting_texts]
455
- # )
456
 
457
- # minutes = gr.Textbox(label="Minutes of Meeting")
458
- # gr.Button(f"Generate Minutes of meeting").click(summarize, [meeting_texts], minutes)
459
 
460
- # # Clear button to reset inputs and outputs
461
- # clear_button.click(clear_all, None, [*video_inputs, *transcript_outputs, *translated_texts, *dubbed_videos, minutes, meeting_texts])
462
 
463
- # demo.launch(debug=True, share=True)
464
- # create_gradio_interface(4, language_choices)
 
186
  target_language = target_language)
187
  return dub_video
188
 
189
+ # # This function handles the processing when any participant speaks
190
+ # def process_speaker(video, speaker_idx, n_participants, *language_list):
191
+ # transcript = speech_to_text(video)
192
 
193
+ # # Create outputs for each participant
194
+ # outputs = []
195
+ # global meeting_texts
196
+ # def process_translation_dubbing(i):
197
+ # if i != speaker_idx:
198
+ # participant_language = language_codes[language_list[i]]
199
+ # speaker_language = language_codes[language_list[speaker_idx]]
200
+ # translated_text = translate_text(transcript, speaker_language, participant_language)
201
+ # dubbed_video = synthesize_speech(video, speaker_language, participant_language)
202
+ # return translated_text, dubbed_video
203
+ # return None, None
204
 
205
+ # with concurrent.futures.ThreadPoolExecutor() as executor:
206
+ # futures = [executor.submit(process_translation_dubbing, i) for i in range(n_participants)]
207
+ # results = [f.result() for f in futures]
208
 
209
+ # for i, (translated_text, dubbed_video) in enumerate(results):
210
+ # if i == speaker_idx:
211
+ # outputs.insert(0, transcript)
212
+ # else:
213
+ # outputs.append(translated_text)
214
+ # outputs.append(dubbed_video)
215
+ # if speaker_idx == 0:
216
+ # meeting_texts.append({f"Speaker_{speaker_idx+1}":outputs[0]})
217
+ # else:
218
+ # meeting_texts.append({f"Speaker_{speaker_idx+1}":outputs[1]})
219
 
220
+ # print(len(outputs))
221
+ # print(outputs)
222
+ # print('meeting_texts: ',meeting_texts)
223
+ # return outputs
224
 
225
+ # def create_participant_row(i, language_choices):
226
+ # """Creates the UI for a single participant."""
227
+ # with gr.Row():
228
+ # video_input = gr.Video(label=f"Participant {i+1} Video", interactive=True)
229
+ # language_dropdown = gr.Dropdown(choices=language_choices, label=f"Participant {i+1} Language", value=language_choices[i])
230
+ # transcript_output = gr.Textbox(label=f"Participant {i+1} Transcript")
231
+ # translated_text = gr.Textbox(label="Speaker's Translated Text")
232
+ # dubbed_video = gr.Video(label="Speaker's Dubbed Video")
233
+ # return video_input, language_dropdown, transcript_output, translated_text, dubbed_video
234
 
235
+ # # Main dynamic Gradio interface
236
+ # def create_gradio_interface(n_participants, language_choices):
237
+ # with gr.Blocks() as demo:
238
+ # gr.Markdown("""# LinguaPolis: Bridging Languages, Uniting Teams Globally - Multilingual Conference Call Simulation
239
+ # ## Record your video or upload your video and press the corresponding Submit button at the bottom""")
240
 
241
+ # video_inputs = []
242
+ # language_dropdowns = []
243
+ # transcript_outputs = []
244
+ # translated_texts = []
245
+ # dubbed_videos = []
246
 
247
+ # clear_button = gr.Button("Clear All")
248
 
249
+ # # Create a row for each participant
250
+ # for i in range(n_participants):
251
+ # video_input, language_dropdown, transcript_output, translated_text, dubbed_video = create_participant_row(i, language_choices)
252
+ # video_inputs.append(video_input)
253
+ # language_dropdowns.append(language_dropdown)
254
+ # transcript_outputs.append(transcript_output)
255
+ # translated_texts.append(translated_text)
256
+ # dubbed_videos.append(dubbed_video)
257
 
258
+ # # Create dynamic processing buttons for each participant
259
+ # for i in range(n_participants):
260
+ # gr.Button(f"Submit Speaker {i+1}'s Speech").click(
261
+ # process_speaker,
262
+ # [video_inputs[i], gr.State(i), gr.State(n_participants)] + [language_dropdowns[j] for j in range(n_participants)],
263
+ # [transcript_outputs[i]] + [k for j in zip(translated_texts[:i]+translated_texts[i+1:], dubbed_videos[:i]+dubbed_videos[i+1:]) for k in j]
264
+ # )
265
+ # minutes = gr.Textbox(label="Minutes of Meeting")
266
+ # gr.Button(f"Generate Minutes of meeting").click(summarize, None, minutes)
267
 
268
+ # # Clear button to reset inputs and outputs
269
+ # clear_button.click(clear_all, None, [*video_inputs, *transcript_outputs, *translated_texts, *dubbed_videos, minutes])
270
+
271
+ # # Launch with .queue() to keep it running properly in Jupyter
272
+ # demo.queue().launch(debug=True, share=True)
273
+
274
+
275
+ # create_gradio_interface(n_participants, language_choices)
276
+
277
+
278
+
279
+
280
 
 
 
281
 
282
 
 
283
 
284
  # def create_dub_from_file(
285
  # input_file_path: str,
 
380
  # return dub_video
381
 
382
 
383
+ # Update process_speaker function to accept and return meeting_texts
384
+ def process_speaker(video, speaker_idx, n_participants, meeting_texts, *language_list):
385
+ transcript = speech_to_text(video)
386
 
387
+ # Create outputs for each participant
388
+ outputs = []
389
 
390
+ def process_translation_dubbing(i):
391
+ if i != speaker_idx:
392
+ participant_language = language_codes[language_list[i]]
393
+ speaker_language = language_codes[language_list[speaker_idx]]
394
+ translated_text = translate_text(transcript, speaker_language, participant_language)
395
+ dubbed_video = synthesize_speech(video, speaker_language, participant_language)
396
+ return translated_text, dubbed_video
397
+ return None, None
398
 
399
+ with concurrent.futures.ThreadPoolExecutor() as executor:
400
+ futures = [executor.submit(process_translation_dubbing, i) for i in range(n_participants)]
401
+ results = [f.result() for f in futures]
402
 
403
+ for i, (translated_text, dubbed_video) in enumerate(results):
404
+ if i == speaker_idx:
405
+ outputs.insert(0, transcript)
406
+ else:
407
+ outputs.append(translated_text)
408
+ outputs.append(dubbed_video)
409
 
410
+ if speaker_idx == 0:
411
+ meeting_texts.append({f"Speaker_{speaker_idx+1}": outputs[0]})
412
+ else:
413
+ meeting_texts.append({f"Speaker_{speaker_idx+1}": outputs[1]})
414
 
415
+ print(len(outputs))
416
+ print(outputs)
417
+ print("meeting_texts:", meeting_texts)
418
+ print('outputs:', outputs)
419
+ outputs.append(meeting_texts)
420
+ print(len(outputs))
421
+ return outputs
422
 
423
 
424
+ def create_participant_row(i, language_choices):
425
+ """Creates the UI for a single participant."""
426
+ with gr.Row():
427
+ video_input = gr.Video(label=f"Participant {i+1} Video", interactive=True)
428
+ language_dropdown = gr.Dropdown(choices=language_choices, label=f"Participant {i+1} Language", value=language_choices[i])
429
+ transcript_output = gr.Textbox(label=f"Participant {i+1} Transcript")
430
+ translated_text = gr.Textbox(label="Speaker's Translated Text")
431
+ dubbed_video = gr.Video(label="Speaker's Dubbed Video")
432
+ return video_input, language_dropdown, transcript_output, translated_text, dubbed_video
433
 
434
 
435
+ # Modify the Gradio interface to manage the meeting_texts between function calls
436
+ def create_gradio_interface(n_participants, language_choices):
437
+ with gr.Blocks() as demo:
438
+ gr.Markdown("""# LinguaPolis: Bridging Languages, Uniting Teams Globally - Multilingual Conference Call Simulation
439
+ ## Record your video or upload your video and press the corresponding Submit button at the bottom""")
440
 
441
+ video_inputs = []
442
+ language_dropdowns = []
443
+ transcript_outputs = []
444
+ translated_texts = []
445
+ dubbed_videos = []
446
 
447
+ clear_button = gr.Button("Clear All")
448
+ meeting_texts = gr.State([]) # Initialize meeting_texts as a Gradio State
449
 
450
+ # Create a row for each participant
451
+ for i in range(n_participants):
452
+ video_input, language_dropdown, transcript_output, translated_text, dubbed_video = create_participant_row(i, language_choices)
453
+ video_inputs.append(video_input)
454
+ language_dropdowns.append(language_dropdown)
455
+ transcript_outputs.append(transcript_output)
456
+ translated_texts.append(translated_text)
457
+ dubbed_videos.append(dubbed_video)
458
 
459
+ # Create dynamic processing buttons for each participant
460
+ for i in range(n_participants):
461
+ gr.Button(f"Submit Speaker {i+1}'s Speech").click(
462
+ process_speaker,
463
+ [video_inputs[i], gr.State(i), gr.State(n_participants), meeting_texts] + [language_dropdowns[j] for j in range(n_participants)],
464
+ [transcript_outputs[i]] + [k for j in zip(translated_texts[:i]+translated_texts[i+1:], dubbed_videos[:i]+dubbed_videos[i+1:]) for k in j] + [meeting_texts]
465
+ )
466
 
467
+ minutes = gr.Textbox(label="Minutes of Meeting")
468
+ gr.Button(f"Generate Minutes of meeting").click(summarize, [meeting_texts], minutes)
469
 
470
+ # Clear button to reset inputs and outputs
471
+ clear_button.click(clear_all, None, [*video_inputs, *transcript_outputs, *translated_texts, *dubbed_videos, minutes, meeting_texts])
472
 
473
+ demo.launch(debug=True, share=True)
474
+ create_gradio_interface(4, language_choices)