vsrinivas commited on
Commit
b17793d
·
verified ·
1 Parent(s): 29791a1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +217 -217
app.py CHANGED
@@ -86,194 +86,6 @@ def download_dubbed_file(dubbing_id: str, language_code: str) -> str:
86
 
87
  return file_path
88
 
89
- # def create_dub_from_file(
90
- # input_file_path: str,
91
- # file_format: str,
92
- # source_language: str,
93
- # target_language: str,
94
- # ):
95
- # # ) -> Optional[str]:
96
- # """
97
- # Dubs an audio or video file from one language to another and saves the output.
98
- # Args:
99
- # input_file_path (str): The file path of the audio or video to dub.
100
- # file_format (str): The file format of the input file.
101
- # source_language (str): The language of the input file.
102
- # target_language (str): The target language to dub into.
103
- # Returns:
104
- # Optional[str]: The file path of the dubbed file or None if operation failed.
105
- # """
106
- # if not os.path.isfile(input_file_path):
107
- # raise FileNotFoundError(f"The input file does not exist: {input_file_path}")
108
-
109
- # with open(input_file_path, "rb") as audio_file:
110
- # response = client.dubbing.dub_a_video_or_an_audio_file(
111
- # file=(os.path.basename(input_file_path), audio_file, file_format), # Optional file
112
- # target_lang=target_language, # The target language to dub the content into. Can be none if dubbing studio editor is enabled and running manual mode
113
- # # mode="automatic", # automatic or manual.
114
- # source_lang=source_language, # Source language
115
- # num_speakers=1, # Number of speakers to use for the dubbing.
116
- # watermark=True, # Whether to apply watermark to the output video.
117
- # )
118
-
119
- # # rest of the code
120
- # dubbing_id = response.dubbing_id
121
- # if wait_for_dubbing_completion(dubbing_id):
122
- # output_file_path = download_dubbed_file(dubbing_id, target_language)
123
- # return output_file_path
124
- # else:
125
- # return None
126
-
127
-
128
- # def summarize(meeting_texts=meeting_texts):
129
- # meeting_texts = ', '.join([f"{k}: {v}" for i in meeting_texts for k, v in i.items()])
130
- # meeting_date_time = str(datetime.now().strftime("%Y-%m-%d %H:%M:%S"))
131
- # # meeting_texts = meeting_date_time + '\n' + meeting_texts
132
-
133
- # # meeting_conversation_processed ='\n'.join(mt)
134
- # # print("M:", session_conversation_processed)
135
-
136
- # minutes_of_meeting = ""
137
- # for chunk in AI71(AI71_API_KEY.strip()).chat.completions.create(
138
- # model="tiiuae/falcon-180b-chat",
139
- # messages=[
140
- # {"role": "system", "content": f"""You are an expereiced Secretary who can summarize meeting discussions into minutes of meeting.
141
- # Summarize the meetings discussions provided as Speakerwise conversation.
142
- # Strictly consider only the context given in user content {meeting_texts} for summarization.
143
- # Ensure to mention the title as 'Minutes of Meeting held on {meeting_date_time} and present the summary with better viewing format and title in bold letters"""},
144
- # {"role": "user", "content": meeting_texts},
145
- # ],
146
- # stream=True,
147
- # ):
148
- # if chunk.choices[0].delta.content:
149
- # summary = chunk.choices[0].delta.content
150
- # minutes_of_meeting += summary
151
- # minutes_of_meeting = minutes_of_meeting.replace('User:', '').strip()
152
- # print("\n")
153
- # print("minutes_of_meeting:", minutes_of_meeting)
154
- # return minutes_of_meeting
155
-
156
-
157
- # # Placeholder function for speech to text conversion
158
- # def speech_to_text(video):
159
- # print(video, type(video))
160
- # print('Started transcribing')
161
- # audio = AudioSegment.from_file(video)
162
- # audio.export('temp.wav', format="wav")
163
-
164
- # # transcript = transcriber.transcribe(video).text
165
- # # transcript = transcriber.transcribe(video).text
166
- # transcript = transcriber("temp.wav").split("'")[1].strip()
167
-
168
- # print('transcript:', transcript)
169
- # return transcript
170
-
171
- # # Placeholder function for translating text
172
- # def translate_text(text, source_language,target_language):
173
- # tokenizer.src_lang = source_language
174
- # encoded_ln = tokenizer(text, return_tensors="pt")
175
- # generated_tokens = translator.generate(**encoded_ln, forced_bos_token_id=tokenizer.get_lang_id(target_language))
176
- # translated_text = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)[0]
177
- # print('translated_text:', translated_text)
178
- # return translated_text
179
-
180
- # # Placeholder function for dubbing (text-to-speech in another language)
181
- # def synthesize_speech(video, source_language,target_language):
182
- # print('Started dubbing')
183
- # dub_video = create_dub_from_file(input_file_path = video,
184
- # file_format = 'audio/mpeg',
185
- # source_language = source_language,
186
- # target_language = target_language)
187
- # return dub_video
188
-
189
- # # This function handles the processing when any participant speaks
190
- # def process_speaker(video, speaker_idx, n_participants, *language_list):
191
- # transcript = speech_to_text(video)
192
-
193
- # # Create outputs for each participant
194
- # outputs = []
195
- # global meeting_texts
196
- # def process_translation_dubbing(i):
197
- # if i != speaker_idx:
198
- # participant_language = language_codes[language_list[i]]
199
- # speaker_language = language_codes[language_list[speaker_idx]]
200
- # translated_text = translate_text(transcript, speaker_language, participant_language)
201
- # dubbed_video = synthesize_speech(video, speaker_language, participant_language)
202
- # return translated_text, dubbed_video
203
- # return None, None
204
-
205
- # with concurrent.futures.ThreadPoolExecutor() as executor:
206
- # futures = [executor.submit(process_translation_dubbing, i) for i in range(n_participants)]
207
- # results = [f.result() for f in futures]
208
-
209
- # for i, (translated_text, dubbed_video) in enumerate(results):
210
- # if i == speaker_idx:
211
- # outputs.insert(0, transcript)
212
- # else:
213
- # outputs.append(translated_text)
214
- # outputs.append(dubbed_video)
215
- # if speaker_idx == 0:
216
- # meeting_texts.append({f"Speaker_{speaker_idx+1}":outputs[0]})
217
- # else:
218
- # meeting_texts.append({f"Speaker_{speaker_idx+1}":outputs[1]})
219
-
220
- # print(len(outputs))
221
- # print(outputs)
222
- # print('meeting_texts: ',meeting_texts)
223
- # return outputs
224
-
225
- # def create_participant_row(i, language_choices):
226
- # """Creates the UI for a single participant."""
227
- # with gr.Row():
228
- # video_input = gr.Video(label=f"Participant {i+1} Video", interactive=True)
229
- # language_dropdown = gr.Dropdown(choices=language_choices, label=f"Participant {i+1} Language", value=language_choices[i])
230
- # transcript_output = gr.Textbox(label=f"Participant {i+1} Transcript")
231
- # translated_text = gr.Textbox(label="Speaker's Translated Text")
232
- # dubbed_video = gr.Video(label="Speaker's Dubbed Video")
233
- # return video_input, language_dropdown, transcript_output, translated_text, dubbed_video
234
-
235
- # # Main dynamic Gradio interface
236
- # def create_gradio_interface(n_participants, language_choices):
237
- # with gr.Blocks() as demo:
238
- # gr.Markdown("""# LinguaPolis: Bridging Languages, Uniting Teams Globally - Multilingual Conference Call Simulation
239
- # ## Record your video or upload your video and press the corresponding Submit button at the bottom""")
240
-
241
- # video_inputs = []
242
- # language_dropdowns = []
243
- # transcript_outputs = []
244
- # translated_texts = []
245
- # dubbed_videos = []
246
-
247
- # clear_button = gr.Button("Clear All")
248
-
249
- # # Create a row for each participant
250
- # for i in range(n_participants):
251
- # video_input, language_dropdown, transcript_output, translated_text, dubbed_video = create_participant_row(i, language_choices)
252
- # video_inputs.append(video_input)
253
- # language_dropdowns.append(language_dropdown)
254
- # transcript_outputs.append(transcript_output)
255
- # translated_texts.append(translated_text)
256
- # dubbed_videos.append(dubbed_video)
257
-
258
- # # Create dynamic processing buttons for each participant
259
- # for i in range(n_participants):
260
- # gr.Button(f"Submit Speaker {i+1}'s Speech").click(
261
- # process_speaker,
262
- # [video_inputs[i], gr.State(i), gr.State(n_participants)] + [language_dropdowns[j] for j in range(n_participants)],
263
- # [transcript_outputs[i]] + [k for j in zip(translated_texts[:i]+translated_texts[i+1:], dubbed_videos[:i]+dubbed_videos[i+1:]) for k in j]
264
- # )
265
- # minutes = gr.Textbox(label="Minutes of Meeting")
266
- # gr.Button(f"Generate Minutes of meeting").click(summarize, None, minutes)
267
-
268
- # # Clear button to reset inputs and outputs
269
- # clear_button.click(clear_all, None, [*video_inputs, *transcript_outputs, *translated_texts, *dubbed_videos, minutes])
270
-
271
- # # Launch with .queue() to keep it running properly in Jupyter
272
- # demo.queue().launch(debug=True, share=True)
273
-
274
-
275
- # create_gradio_interface(n_participants, language_choices)
276
-
277
  def create_dub_from_file(
278
  input_file_path: str,
279
  file_format: str,
@@ -313,20 +125,22 @@ def create_dub_from_file(
313
  return None
314
 
315
 
316
- # Modify the summarize function to accept and return meeting_texts
317
- def summarize(meeting_texts):
318
  meeting_texts = ', '.join([f"{k}: {v}" for i in meeting_texts for k, v in i.items()])
319
  meeting_date_time = str(datetime.now().strftime("%Y-%m-%d %H:%M:%S"))
320
- # meeting_texts_str = meeting_date_time + '\n' + mt
 
 
 
321
 
322
  minutes_of_meeting = ""
323
  for chunk in AI71(AI71_API_KEY.strip()).chat.completions.create(
324
  model="tiiuae/falcon-180b-chat",
325
  messages=[
326
- {"role": "system", "content": f"""You are an experienced Secretary who can summarize meeting discussions into minutes of meeting.
327
  Summarize the meetings discussions provided as Speakerwise conversation.
328
- Strictly consider only the context given in user content for summarization.
329
- Ensure to mention the title as 'Minutes of Meeting held on {meeting_date_time}' and present the summary with better viewing format and title in bold letters."""},
330
  {"role": "user", "content": meeting_texts},
331
  ],
332
  stream=True,
@@ -334,8 +148,8 @@ def summarize(meeting_texts):
334
  if chunk.choices[0].delta.content:
335
  summary = chunk.choices[0].delta.content
336
  minutes_of_meeting += summary
337
-
338
  minutes_of_meeting = minutes_of_meeting.replace('User:', '').strip()
 
339
  print("minutes_of_meeting:", minutes_of_meeting)
340
  return minutes_of_meeting
341
 
@@ -372,14 +186,13 @@ def synthesize_speech(video, source_language,target_language):
372
  target_language = target_language)
373
  return dub_video
374
 
375
-
376
- # Update process_speaker function to accept and return meeting_texts
377
- def process_speaker(video, speaker_idx, n_participants, meeting_texts, *language_list):
378
  transcript = speech_to_text(video)
379
 
380
  # Create outputs for each participant
381
  outputs = []
382
-
383
  def process_translation_dubbing(i):
384
  if i != speaker_idx:
385
  participant_language = language_codes[language_list[i]]
@@ -399,18 +212,16 @@ def process_speaker(video, speaker_idx, n_participants, meeting_texts, *language
399
  else:
400
  outputs.append(translated_text)
401
  outputs.append(dubbed_video)
402
-
403
  if speaker_idx == 0:
404
- meeting_texts.append({f"Speaker_{speaker_idx+1}": outputs[0]})
405
  else:
406
- meeting_texts.append({f"Speaker_{speaker_idx+1}": outputs[1]})
407
 
408
- print("meeting_texts:", meeting_texts)
409
- print('outputs:', outputs)
410
- outputs.append(meeting_texts)
411
  return outputs
412
 
413
-
414
  def create_participant_row(i, language_choices):
415
  """Creates the UI for a single participant."""
416
  with gr.Row():
@@ -421,8 +232,7 @@ def create_participant_row(i, language_choices):
421
  dubbed_video = gr.Video(label="Speaker's Dubbed Video")
422
  return video_input, language_dropdown, transcript_output, translated_text, dubbed_video
423
 
424
-
425
- # Modify the Gradio interface to manage the meeting_texts between function calls
426
  def create_gradio_interface(n_participants, language_choices):
427
  with gr.Blocks() as demo:
428
  gr.Markdown("""# LinguaPolis: Bridging Languages, Uniting Teams Globally - Multilingual Conference Call Simulation
@@ -435,7 +245,6 @@ def create_gradio_interface(n_participants, language_choices):
435
  dubbed_videos = []
436
 
437
  clear_button = gr.Button("Clear All")
438
- meeting_texts = gr.State([]) # Initialize meeting_texts as a Gradio State
439
 
440
  # Create a row for each participant
441
  for i in range(n_participants):
@@ -450,15 +259,206 @@ def create_gradio_interface(n_participants, language_choices):
450
  for i in range(n_participants):
451
  gr.Button(f"Submit Speaker {i+1}'s Speech").click(
452
  process_speaker,
453
- [video_inputs[i], gr.State(i), gr.State(n_participants), meeting_texts] + [language_dropdowns[j] for j in range(n_participants)],
454
- [transcript_outputs[i]] + [k for j in zip(translated_texts[:i]+translated_texts[i+1:], dubbed_videos[:i]+dubbed_videos[i+1:]) for k in j] + [meeting_texts]
455
- )
456
-
457
  minutes = gr.Textbox(label="Minutes of Meeting")
458
- gr.Button(f"Generate Minutes of meeting").click(summarize, [meeting_texts], minutes)
459
 
460
  # Clear button to reset inputs and outputs
461
- clear_button.click(clear_all, None, [*video_inputs, *transcript_outputs, *translated_texts, *dubbed_videos, minutes, meeting_texts])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
462
 
463
- demo.launch(debug=True, share=True)
464
- create_gradio_interface(4, language_choices)
 
86
 
87
  return file_path
88
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
89
  def create_dub_from_file(
90
  input_file_path: str,
91
  file_format: str,
 
125
  return None
126
 
127
 
128
+ def summarize(meeting_texts=meeting_texts):
 
129
  meeting_texts = ', '.join([f"{k}: {v}" for i in meeting_texts for k, v in i.items()])
130
  meeting_date_time = str(datetime.now().strftime("%Y-%m-%d %H:%M:%S"))
131
+ # meeting_texts = meeting_date_time + '\n' + meeting_texts
132
+
133
+ # meeting_conversation_processed ='\n'.join(mt)
134
+ # print("M:", session_conversation_processed)
135
 
136
  minutes_of_meeting = ""
137
  for chunk in AI71(AI71_API_KEY.strip()).chat.completions.create(
138
  model="tiiuae/falcon-180b-chat",
139
  messages=[
140
+ {"role": "system", "content": f"""You are an expereiced Secretary who can summarize meeting discussions into minutes of meeting.
141
  Summarize the meetings discussions provided as Speakerwise conversation.
142
+ Strictly consider only the context given in user content {meeting_texts} for summarization.
143
+ Ensure to mention the title as 'Minutes of Meeting held on {meeting_date_time} and present the summary with better viewing format and title in bold letters"""},
144
  {"role": "user", "content": meeting_texts},
145
  ],
146
  stream=True,
 
148
  if chunk.choices[0].delta.content:
149
  summary = chunk.choices[0].delta.content
150
  minutes_of_meeting += summary
 
151
  minutes_of_meeting = minutes_of_meeting.replace('User:', '').strip()
152
+ print("\n")
153
  print("minutes_of_meeting:", minutes_of_meeting)
154
  return minutes_of_meeting
155
 
 
186
  target_language = target_language)
187
  return dub_video
188
 
189
+ # This function handles the processing when any participant speaks
190
+ def process_speaker(video, speaker_idx, n_participants, *language_list):
 
191
  transcript = speech_to_text(video)
192
 
193
  # Create outputs for each participant
194
  outputs = []
195
+ global meeting_texts
196
  def process_translation_dubbing(i):
197
  if i != speaker_idx:
198
  participant_language = language_codes[language_list[i]]
 
212
  else:
213
  outputs.append(translated_text)
214
  outputs.append(dubbed_video)
 
215
  if speaker_idx == 0:
216
+ meeting_texts.append({f"Speaker_{speaker_idx+1}":outputs[0]})
217
  else:
218
+ meeting_texts.append({f"Speaker_{speaker_idx+1}":outputs[1]})
219
 
220
+ print(len(outputs))
221
+ print(outputs)
222
+ print('meeting_texts: ',meeting_texts)
223
  return outputs
224
 
 
225
  def create_participant_row(i, language_choices):
226
  """Creates the UI for a single participant."""
227
  with gr.Row():
 
232
  dubbed_video = gr.Video(label="Speaker's Dubbed Video")
233
  return video_input, language_dropdown, transcript_output, translated_text, dubbed_video
234
 
235
+ # Main dynamic Gradio interface
 
236
  def create_gradio_interface(n_participants, language_choices):
237
  with gr.Blocks() as demo:
238
  gr.Markdown("""# LinguaPolis: Bridging Languages, Uniting Teams Globally - Multilingual Conference Call Simulation
 
245
  dubbed_videos = []
246
 
247
  clear_button = gr.Button("Clear All")
 
248
 
249
  # Create a row for each participant
250
  for i in range(n_participants):
 
259
  for i in range(n_participants):
260
  gr.Button(f"Submit Speaker {i+1}'s Speech").click(
261
  process_speaker,
262
+ [video_inputs[i], gr.State(i), gr.State(n_participants)] + [language_dropdowns[j] for j in range(n_participants)],
263
+ [transcript_outputs[i]] + [k for j in zip(translated_texts[:i]+translated_texts[i+1:], dubbed_videos[:i]+dubbed_videos[i+1:]) for k in j]
264
+ )
 
265
  minutes = gr.Textbox(label="Minutes of Meeting")
266
+ gr.Button(f"Generate Minutes of meeting").click(summarize, None, minutes)
267
 
268
  # Clear button to reset inputs and outputs
269
+ clear_button.click(clear_all, None, [*video_inputs, *transcript_outputs, *translated_texts, *dubbed_videos, minutes])
270
+
271
+ # Launch with .queue() to keep it running properly in Jupyter
272
+ demo.queue().launch(debug=True, share=True)
273
+
274
+
275
+ create_gradio_interface(n_participants, language_choices)
276
+
277
+ # def create_dub_from_file(
278
+ # input_file_path: str,
279
+ # file_format: str,
280
+ # source_language: str,
281
+ # target_language: str,
282
+ # ):
283
+ # # ) -> Optional[str]:
284
+ # """
285
+ # Dubs an audio or video file from one language to another and saves the output.
286
+ # Args:
287
+ # input_file_path (str): The file path of the audio or video to dub.
288
+ # file_format (str): The file format of the input file.
289
+ # source_language (str): The language of the input file.
290
+ # target_language (str): The target language to dub into.
291
+ # Returns:
292
+ # Optional[str]: The file path of the dubbed file or None if operation failed.
293
+ # """
294
+ # if not os.path.isfile(input_file_path):
295
+ # raise FileNotFoundError(f"The input file does not exist: {input_file_path}")
296
+
297
+ # with open(input_file_path, "rb") as audio_file:
298
+ # response = client.dubbing.dub_a_video_or_an_audio_file(
299
+ # file=(os.path.basename(input_file_path), audio_file, file_format), # Optional file
300
+ # target_lang=target_language, # The target language to dub the content into. Can be none if dubbing studio editor is enabled and running manual mode
301
+ # # mode="automatic", # automatic or manual.
302
+ # source_lang=source_language, # Source language
303
+ # num_speakers=1, # Number of speakers to use for the dubbing.
304
+ # watermark=True, # Whether to apply watermark to the output video.
305
+ # )
306
+
307
+ # # rest of the code
308
+ # dubbing_id = response.dubbing_id
309
+ # if wait_for_dubbing_completion(dubbing_id):
310
+ # output_file_path = download_dubbed_file(dubbing_id, target_language)
311
+ # return output_file_path
312
+ # else:
313
+ # return None
314
+
315
+
316
+ # # Modify the summarize function to accept and return meeting_texts
317
+ # def summarize(meeting_texts):
318
+ # meeting_texts = ', '.join([f"{k}: {v}" for i in meeting_texts for k, v in i.items()])
319
+ # meeting_date_time = str(datetime.now().strftime("%Y-%m-%d %H:%M:%S"))
320
+ # # meeting_texts_str = meeting_date_time + '\n' + mt
321
+
322
+ # minutes_of_meeting = ""
323
+ # for chunk in AI71(AI71_API_KEY.strip()).chat.completions.create(
324
+ # model="tiiuae/falcon-180b-chat",
325
+ # messages=[
326
+ # {"role": "system", "content": f"""You are an experienced Secretary who can summarize meeting discussions into minutes of meeting.
327
+ # Summarize the meetings discussions provided as Speakerwise conversation.
328
+ # Strictly consider only the context given in user content for summarization.
329
+ # Ensure to mention the title as 'Minutes of Meeting held on {meeting_date_time}' and present the summary with better viewing format and title in bold letters."""},
330
+ # {"role": "user", "content": meeting_texts},
331
+ # ],
332
+ # stream=True,
333
+ # ):
334
+ # if chunk.choices[0].delta.content:
335
+ # summary = chunk.choices[0].delta.content
336
+ # minutes_of_meeting += summary
337
+
338
+ # minutes_of_meeting = minutes_of_meeting.replace('User:', '').strip()
339
+ # print("minutes_of_meeting:", minutes_of_meeting)
340
+ # return minutes_of_meeting
341
+
342
+
343
+ # # Placeholder function for speech to text conversion
344
+ # def speech_to_text(video):
345
+ # print(video, type(video))
346
+ # print('Started transcribing')
347
+ # audio = AudioSegment.from_file(video)
348
+ # audio.export('temp.wav', format="wav")
349
+
350
+ # # transcript = transcriber.transcribe(video).text
351
+ # # transcript = transcriber.transcribe(video).text
352
+ # transcript = transcriber("temp.wav").split("'")[1].strip()
353
+
354
+ # print('transcript:', transcript)
355
+ # return transcript
356
+
357
+ # # Placeholder function for translating text
358
+ # def translate_text(text, source_language,target_language):
359
+ # tokenizer.src_lang = source_language
360
+ # encoded_ln = tokenizer(text, return_tensors="pt")
361
+ # generated_tokens = translator.generate(**encoded_ln, forced_bos_token_id=tokenizer.get_lang_id(target_language))
362
+ # translated_text = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)[0]
363
+ # print('translated_text:', translated_text)
364
+ # return translated_text
365
+
366
+ # # Placeholder function for dubbing (text-to-speech in another language)
367
+ # def synthesize_speech(video, source_language,target_language):
368
+ # print('Started dubbing')
369
+ # dub_video = create_dub_from_file(input_file_path = video,
370
+ # file_format = 'audio/mpeg',
371
+ # source_language = source_language,
372
+ # target_language = target_language)
373
+ # return dub_video
374
+
375
+
376
+ # # Update process_speaker function to accept and return meeting_texts
377
+ # def process_speaker(video, speaker_idx, n_participants, meeting_texts, *language_list):
378
+ # transcript = speech_to_text(video)
379
+
380
+ # # Create outputs for each participant
381
+ # outputs = []
382
+
383
+ # def process_translation_dubbing(i):
384
+ # if i != speaker_idx:
385
+ # participant_language = language_codes[language_list[i]]
386
+ # speaker_language = language_codes[language_list[speaker_idx]]
387
+ # translated_text = translate_text(transcript, speaker_language, participant_language)
388
+ # dubbed_video = synthesize_speech(video, speaker_language, participant_language)
389
+ # return translated_text, dubbed_video
390
+ # return None, None
391
+
392
+ # with concurrent.futures.ThreadPoolExecutor() as executor:
393
+ # futures = [executor.submit(process_translation_dubbing, i) for i in range(n_participants)]
394
+ # results = [f.result() for f in futures]
395
+
396
+ # for i, (translated_text, dubbed_video) in enumerate(results):
397
+ # if i == speaker_idx:
398
+ # outputs.insert(0, transcript)
399
+ # else:
400
+ # outputs.append(translated_text)
401
+ # outputs.append(dubbed_video)
402
+
403
+ # if speaker_idx == 0:
404
+ # meeting_texts.append({f"Speaker_{speaker_idx+1}": outputs[0]})
405
+ # else:
406
+ # meeting_texts.append({f"Speaker_{speaker_idx+1}": outputs[1]})
407
+
408
+ # print("meeting_texts:", meeting_texts)
409
+ # print('outputs:', outputs)
410
+ # outputs.append(meeting_texts)
411
+ # return outputs
412
+
413
+
414
+ # def create_participant_row(i, language_choices):
415
+ # """Creates the UI for a single participant."""
416
+ # with gr.Row():
417
+ # video_input = gr.Video(label=f"Participant {i+1} Video", interactive=True)
418
+ # language_dropdown = gr.Dropdown(choices=language_choices, label=f"Participant {i+1} Language", value=language_choices[i])
419
+ # transcript_output = gr.Textbox(label=f"Participant {i+1} Transcript")
420
+ # translated_text = gr.Textbox(label="Speaker's Translated Text")
421
+ # dubbed_video = gr.Video(label="Speaker's Dubbed Video")
422
+ # return video_input, language_dropdown, transcript_output, translated_text, dubbed_video
423
+
424
+
425
+ # # Modify the Gradio interface to manage the meeting_texts between function calls
426
+ # def create_gradio_interface(n_participants, language_choices):
427
+ # with gr.Blocks() as demo:
428
+ # gr.Markdown("""# LinguaPolis: Bridging Languages, Uniting Teams Globally - Multilingual Conference Call Simulation
429
+ # ## Record your video or upload your video and press the corresponding Submit button at the bottom""")
430
+
431
+ # video_inputs = []
432
+ # language_dropdowns = []
433
+ # transcript_outputs = []
434
+ # translated_texts = []
435
+ # dubbed_videos = []
436
+
437
+ # clear_button = gr.Button("Clear All")
438
+ # meeting_texts = gr.State([]) # Initialize meeting_texts as a Gradio State
439
+
440
+ # # Create a row for each participant
441
+ # for i in range(n_participants):
442
+ # video_input, language_dropdown, transcript_output, translated_text, dubbed_video = create_participant_row(i, language_choices)
443
+ # video_inputs.append(video_input)
444
+ # language_dropdowns.append(language_dropdown)
445
+ # transcript_outputs.append(transcript_output)
446
+ # translated_texts.append(translated_text)
447
+ # dubbed_videos.append(dubbed_video)
448
+
449
+ # # Create dynamic processing buttons for each participant
450
+ # for i in range(n_participants):
451
+ # gr.Button(f"Submit Speaker {i+1}'s Speech").click(
452
+ # process_speaker,
453
+ # [video_inputs[i], gr.State(i), gr.State(n_participants), meeting_texts] + [language_dropdowns[j] for j in range(n_participants)],
454
+ # [transcript_outputs[i]] + [k for j in zip(translated_texts[:i]+translated_texts[i+1:], dubbed_videos[:i]+dubbed_videos[i+1:]) for k in j] + [meeting_texts]
455
+ # )
456
+
457
+ # minutes = gr.Textbox(label="Minutes of Meeting")
458
+ # gr.Button(f"Generate Minutes of meeting").click(summarize, [meeting_texts], minutes)
459
+
460
+ # # Clear button to reset inputs and outputs
461
+ # clear_button.click(clear_all, None, [*video_inputs, *transcript_outputs, *translated_texts, *dubbed_videos, minutes, meeting_texts])
462
 
463
+ # demo.launch(debug=True, share=True)
464
+ # create_gradio_interface(4, language_choices)