Spaces:

vsrinivas
/

Multi_Language_Video_Conference

Sleeping

App Files Files Community

vsrinivas commited on Oct 21, 2024

Commit

4eab1ba

verified ·

1 Parent(s): b17793d

Update app.py

Browse files

Files changed (1) hide show

app.py +155 -145

app.py CHANGED Viewed

@@ -186,93 +186,100 @@ def synthesize_speech(video, source_language,target_language):
       target_language = target_language)
     return dub_video
-# This function handles the processing when any participant speaks
-def process_speaker(video, speaker_idx, n_participants, *language_list):
-    transcript = speech_to_text(video)
-    # Create outputs for each participant
-    outputs = []
-    global meeting_texts
-    def process_translation_dubbing(i):
-        if i != speaker_idx:
-            participant_language = language_codes[language_list[i]]
-            speaker_language = language_codes[language_list[speaker_idx]]
-            translated_text = translate_text(transcript, speaker_language, participant_language)
-            dubbed_video = synthesize_speech(video, speaker_language, participant_language)
-            return translated_text, dubbed_video
-        return None, None
-    with concurrent.futures.ThreadPoolExecutor() as executor:
-        futures = [executor.submit(process_translation_dubbing, i) for i in range(n_participants)]
-        results = [f.result() for f in futures]
-    for i, (translated_text, dubbed_video) in enumerate(results):
-        if i == speaker_idx:
-            outputs.insert(0, transcript)
-        else:
-            outputs.append(translated_text)
-            outputs.append(dubbed_video)
-    if speaker_idx == 0:
-        meeting_texts.append({f"Speaker_{speaker_idx+1}":outputs[0]})
-    else:
-        meeting_texts.append({f"Speaker_{speaker_idx+1}":outputs[1]})
-    print(len(outputs))
-    print(outputs)
-    print('meeting_texts: ',meeting_texts)
-    return outputs
-def create_participant_row(i, language_choices):
-    """Creates the UI for a single participant."""
-    with gr.Row():
-        video_input = gr.Video(label=f"Participant {i+1} Video", interactive=True)
-        language_dropdown = gr.Dropdown(choices=language_choices, label=f"Participant {i+1} Language", value=language_choices[i])
-        transcript_output = gr.Textbox(label=f"Participant {i+1} Transcript")
-        translated_text = gr.Textbox(label="Speaker's Translated Text")
-        dubbed_video = gr.Video(label="Speaker's Dubbed Video")
-        return video_input, language_dropdown, transcript_output, translated_text, dubbed_video
-# Main dynamic Gradio interface
-def create_gradio_interface(n_participants, language_choices):
-    with gr.Blocks() as demo:
-        gr.Markdown("""# LinguaPolis: Bridging Languages, Uniting Teams Globally - Multilingual Conference Call Simulation
-        ## Record your video or upload your video and press the corresponding Submit button at the bottom""")
-        video_inputs = []
-        language_dropdowns = []
-        transcript_outputs = []
-        translated_texts = []
-        dubbed_videos = []
-        clear_button = gr.Button("Clear All")
-        # Create a row for each participant
-        for i in range(n_participants):
-            video_input, language_dropdown, transcript_output, translated_text, dubbed_video = create_participant_row(i, language_choices)
-            video_inputs.append(video_input)
-            language_dropdowns.append(language_dropdown)
-            transcript_outputs.append(transcript_output)
-            translated_texts.append(translated_text)
-            dubbed_videos.append(dubbed_video)
-        # Create dynamic processing buttons for each participant
-        for i in range(n_participants):
-            gr.Button(f"Submit Speaker {i+1}'s Speech").click(
-                process_speaker,
-                [video_inputs[i], gr.State(i), gr.State(n_participants)] + [language_dropdowns[j] for j in range(n_participants)],
-                [transcript_outputs[i]] + [k for j in zip(translated_texts[:i]+translated_texts[i+1:], dubbed_videos[:i]+dubbed_videos[i+1:]) for k in j]
-           )
-        minutes = gr.Textbox(label="Minutes of Meeting")
-        gr.Button(f"Generate Minutes of meeting").click(summarize, None, minutes)
-        # Clear button to reset inputs and outputs
-        clear_button.click(clear_all, None, [*video_inputs, *transcript_outputs, *translated_texts, *dubbed_videos, minutes])
-    # Launch with .queue() to keep it running properly in Jupyter
-    demo.queue().launch(debug=True, share=True)
-create_gradio_interface(n_participants, language_choices)
 # def create_dub_from_file(
 #     input_file_path: str,
@@ -373,92 +380,95 @@ create_gradio_interface(n_participants, language_choices)
 #     return dub_video
-# # Update process_speaker function to accept and return meeting_texts
-# def process_speaker(video, speaker_idx, n_participants, meeting_texts, *language_list):
-#     transcript = speech_to_text(video)
-#     # Create outputs for each participant
-#     outputs = []
-#     def process_translation_dubbing(i):
-#         if i != speaker_idx:
-#             participant_language = language_codes[language_list[i]]
-#             speaker_language = language_codes[language_list[speaker_idx]]
-#             translated_text = translate_text(transcript, speaker_language, participant_language)
-#             dubbed_video = synthesize_speech(video, speaker_language, participant_language)
-#             return translated_text, dubbed_video
-#         return None, None
-#     with concurrent.futures.ThreadPoolExecutor() as executor:
-#         futures = [executor.submit(process_translation_dubbing, i) for i in range(n_participants)]
-#         results = [f.result() for f in futures]
-#     for i, (translated_text, dubbed_video) in enumerate(results):
-#         if i == speaker_idx:
-#             outputs.insert(0, transcript)
-#         else:
-#             outputs.append(translated_text)
-#             outputs.append(dubbed_video)
-#     if speaker_idx == 0:
-#         meeting_texts.append({f"Speaker_{speaker_idx+1}": outputs[0]})
-#     else:
-#         meeting_texts.append({f"Speaker_{speaker_idx+1}": outputs[1]})
-#     print("meeting_texts:", meeting_texts)
-#     print('outputs:', outputs)
-#     outputs.append(meeting_texts)
-#     return outputs
-# def create_participant_row(i, language_choices):
-#     """Creates the UI for a single participant."""
-#     with gr.Row():
-#         video_input = gr.Video(label=f"Participant {i+1} Video", interactive=True)
-#         language_dropdown = gr.Dropdown(choices=language_choices, label=f"Participant {i+1} Language", value=language_choices[i])
-#         transcript_output = gr.Textbox(label=f"Participant {i+1} Transcript")
-#         translated_text = gr.Textbox(label="Speaker's Translated Text")
-#         dubbed_video = gr.Video(label="Speaker's Dubbed Video")
-#         return video_input, language_dropdown, transcript_output, translated_text, dubbed_video
-# # Modify the Gradio interface to manage the meeting_texts between function calls
-# def create_gradio_interface(n_participants, language_choices):
-#     with gr.Blocks() as demo:
-#         gr.Markdown("""# LinguaPolis: Bridging Languages, Uniting Teams Globally - Multilingual Conference Call Simulation
-#         ## Record your video or upload your video and press the corresponding Submit button at the bottom""")
-#         video_inputs = []
-#         language_dropdowns = []
-#         transcript_outputs = []
-#         translated_texts = []
-#         dubbed_videos = []
-#         clear_button = gr.Button("Clear All")
-#         meeting_texts = gr.State([])  # Initialize meeting_texts as a Gradio State
-#         # Create a row for each participant
-#         for i in range(n_participants):
-#             video_input, language_dropdown, transcript_output, translated_text, dubbed_video = create_participant_row(i, language_choices)
-#             video_inputs.append(video_input)
-#             language_dropdowns.append(language_dropdown)
-#             transcript_outputs.append(transcript_output)
-#             translated_texts.append(translated_text)
-#             dubbed_videos.append(dubbed_video)
-#         # Create dynamic processing buttons for each participant
-#         for i in range(n_participants):
-#             gr.Button(f"Submit Speaker {i+1}'s Speech").click(
-#                 process_speaker,
-#                 [video_inputs[i], gr.State(i), gr.State(n_participants), meeting_texts] + [language_dropdowns[j] for j in range(n_participants)],
-#                 [transcript_outputs[i]] + [k for j in zip(translated_texts[:i]+translated_texts[i+1:], dubbed_videos[:i]+dubbed_videos[i+1:]) for k in j] + [meeting_texts]
-#             )
-#         minutes = gr.Textbox(label="Minutes of Meeting")
-#         gr.Button(f"Generate Minutes of meeting").click(summarize, [meeting_texts], minutes)
-#         # Clear button to reset inputs and outputs
-#         clear_button.click(clear_all, None, [*video_inputs, *transcript_outputs, *translated_texts, *dubbed_videos, minutes, meeting_texts])
-#     demo.launch(debug=True, share=True)
-# create_gradio_interface(4, language_choices)

       target_language = target_language)
     return dub_video
+# # This function handles the processing when any participant speaks
+# def process_speaker(video, speaker_idx, n_participants, *language_list):
+#     transcript = speech_to_text(video)
+#     # Create outputs for each participant
+#     outputs = []
+#     global meeting_texts
+#     def process_translation_dubbing(i):
+#         if i != speaker_idx:
+#             participant_language = language_codes[language_list[i]]
+#             speaker_language = language_codes[language_list[speaker_idx]]
+#             translated_text = translate_text(transcript, speaker_language, participant_language)
+#             dubbed_video = synthesize_speech(video, speaker_language, participant_language)
+#             return translated_text, dubbed_video
+#         return None, None
+#     with concurrent.futures.ThreadPoolExecutor() as executor:
+#         futures = [executor.submit(process_translation_dubbing, i) for i in range(n_participants)]
+#         results = [f.result() for f in futures]
+#     for i, (translated_text, dubbed_video) in enumerate(results):
+#         if i == speaker_idx:
+#             outputs.insert(0, transcript)
+#         else:
+#             outputs.append(translated_text)
+#             outputs.append(dubbed_video)
+#     if speaker_idx == 0:
+#         meeting_texts.append({f"Speaker_{speaker_idx+1}":outputs[0]})
+#     else:
+#         meeting_texts.append({f"Speaker_{speaker_idx+1}":outputs[1]})
+#     print(len(outputs))
+#     print(outputs)
+#     print('meeting_texts: ',meeting_texts)
+#     return outputs
+# def create_participant_row(i, language_choices):
+#     """Creates the UI for a single participant."""
+#     with gr.Row():
+#         video_input = gr.Video(label=f"Participant {i+1} Video", interactive=True)
+#         language_dropdown = gr.Dropdown(choices=language_choices, label=f"Participant {i+1} Language", value=language_choices[i])
+#         transcript_output = gr.Textbox(label=f"Participant {i+1} Transcript")
+#         translated_text = gr.Textbox(label="Speaker's Translated Text")
+#         dubbed_video = gr.Video(label="Speaker's Dubbed Video")
+#         return video_input, language_dropdown, transcript_output, translated_text, dubbed_video
+# # Main dynamic Gradio interface
+# def create_gradio_interface(n_participants, language_choices):
+#     with gr.Blocks() as demo:
+#         gr.Markdown("""# LinguaPolis: Bridging Languages, Uniting Teams Globally - Multilingual Conference Call Simulation
+#         ## Record your video or upload your video and press the corresponding Submit button at the bottom""")
+#         video_inputs = []
+#         language_dropdowns = []
+#         transcript_outputs = []
+#         translated_texts = []
+#         dubbed_videos = []
+#         clear_button = gr.Button("Clear All")
+#         # Create a row for each participant
+#         for i in range(n_participants):
+#             video_input, language_dropdown, transcript_output, translated_text, dubbed_video = create_participant_row(i, language_choices)
+#             video_inputs.append(video_input)
+#             language_dropdowns.append(language_dropdown)
+#             transcript_outputs.append(transcript_output)
+#             translated_texts.append(translated_text)
+#             dubbed_videos.append(dubbed_video)
+#         # Create dynamic processing buttons for each participant
+#         for i in range(n_participants):
+#             gr.Button(f"Submit Speaker {i+1}'s Speech").click(
+#                 process_speaker,
+#                 [video_inputs[i], gr.State(i), gr.State(n_participants)] + [language_dropdowns[j] for j in range(n_participants)],
+#                 [transcript_outputs[i]] + [k for j in zip(translated_texts[:i]+translated_texts[i+1:], dubbed_videos[:i]+dubbed_videos[i+1:]) for k in j]
+#            )
+#         minutes = gr.Textbox(label="Minutes of Meeting")
+#         gr.Button(f"Generate Minutes of meeting").click(summarize, None, minutes)
+#         # Clear button to reset inputs and outputs
+#         clear_button.click(clear_all, None, [*video_inputs, *transcript_outputs, *translated_texts, *dubbed_videos, minutes])
+#     # Launch with .queue() to keep it running properly in Jupyter
+#     demo.queue().launch(debug=True, share=True)
+# create_gradio_interface(n_participants, language_choices)
 # def create_dub_from_file(
 #     input_file_path: str,
 #     return dub_video
+# Update process_speaker function to accept and return meeting_texts
+def process_speaker(video, speaker_idx, n_participants, meeting_texts, *language_list):
+    transcript = speech_to_text(video)
+    # Create outputs for each participant
+    outputs = []
+    def process_translation_dubbing(i):
+        if i != speaker_idx:
+            participant_language = language_codes[language_list[i]]
+            speaker_language = language_codes[language_list[speaker_idx]]
+            translated_text = translate_text(transcript, speaker_language, participant_language)
+            dubbed_video = synthesize_speech(video, speaker_language, participant_language)
+            return translated_text, dubbed_video
+        return None, None
+    with concurrent.futures.ThreadPoolExecutor() as executor:
+        futures = [executor.submit(process_translation_dubbing, i) for i in range(n_participants)]
+        results = [f.result() for f in futures]
+    for i, (translated_text, dubbed_video) in enumerate(results):
+        if i == speaker_idx:
+            outputs.insert(0, transcript)
+        else:
+            outputs.append(translated_text)
+            outputs.append(dubbed_video)
+    if speaker_idx == 0:
+        meeting_texts.append({f"Speaker_{speaker_idx+1}": outputs[0]})
+    else:
+        meeting_texts.append({f"Speaker_{speaker_idx+1}": outputs[1]})
+    print(len(outputs))
+    print(outputs)
+    print("meeting_texts:", meeting_texts)
+    print('outputs:', outputs)
+    outputs.append(meeting_texts)
+    print(len(outputs))
+    return outputs
+def create_participant_row(i, language_choices):
+    """Creates the UI for a single participant."""
+    with gr.Row():
+        video_input = gr.Video(label=f"Participant {i+1} Video", interactive=True)
+        language_dropdown = gr.Dropdown(choices=language_choices, label=f"Participant {i+1} Language", value=language_choices[i])
+        transcript_output = gr.Textbox(label=f"Participant {i+1} Transcript")
+        translated_text = gr.Textbox(label="Speaker's Translated Text")
+        dubbed_video = gr.Video(label="Speaker's Dubbed Video")
+        return video_input, language_dropdown, transcript_output, translated_text, dubbed_video
+# Modify the Gradio interface to manage the meeting_texts between function calls
+def create_gradio_interface(n_participants, language_choices):
+    with gr.Blocks() as demo:
+        gr.Markdown("""# LinguaPolis: Bridging Languages, Uniting Teams Globally - Multilingual Conference Call Simulation
+        ## Record your video or upload your video and press the corresponding Submit button at the bottom""")
+        video_inputs = []
+        language_dropdowns = []
+        transcript_outputs = []
+        translated_texts = []
+        dubbed_videos = []
+        clear_button = gr.Button("Clear All")
+        meeting_texts = gr.State([])  # Initialize meeting_texts as a Gradio State
+        # Create a row for each participant
+        for i in range(n_participants):
+            video_input, language_dropdown, transcript_output, translated_text, dubbed_video = create_participant_row(i, language_choices)
+            video_inputs.append(video_input)
+            language_dropdowns.append(language_dropdown)
+            transcript_outputs.append(transcript_output)
+            translated_texts.append(translated_text)
+            dubbed_videos.append(dubbed_video)
+        # Create dynamic processing buttons for each participant
+        for i in range(n_participants):
+            gr.Button(f"Submit Speaker {i+1}'s Speech").click(
+                process_speaker,
+                [video_inputs[i], gr.State(i), gr.State(n_participants), meeting_texts] + [language_dropdowns[j] for j in range(n_participants)],
+                [transcript_outputs[i]] + [k for j in zip(translated_texts[:i]+translated_texts[i+1:], dubbed_videos[:i]+dubbed_videos[i+1:]) for k in j] + [meeting_texts]
+            )
+        minutes = gr.Textbox(label="Minutes of Meeting")
+        gr.Button(f"Generate Minutes of meeting").click(summarize, [meeting_texts], minutes)
+        # Clear button to reset inputs and outputs
+        clear_button.click(clear_all, None, [*video_inputs, *transcript_outputs, *translated_texts, *dubbed_videos, minutes, meeting_texts])
+    demo.launch(debug=True, share=True)
+create_gradio_interface(4, language_choices)