Spaces:

vsrinivas
/

Multi_Language_Video_Conference

Sleeping

App Files Files Community

vsrinivas commited on Oct 14, 2024

Commit

034d51f

verified ·

1 Parent(s): 5d4e153

Update app.py

Browse files

Files changed (1) hide show

app.py +18 -14

app.py CHANGED Viewed

@@ -33,12 +33,12 @@ language_codes = {"English":"en", "Hindi":"hi", "Portuguese":"pt", "Chinese":"zh
 "Ukrainian":"uk", "Greek":"el", "Czech":"cs", "Danish":"da", "Finnish":"fi",
 "Bulgarian":"bg", "Croatian":"hr", "Slovak":"sk"}
-meeting_texts = []
 n_participants = 4 # This can be adjusted based on the number of people in the call
 language_choices = ["English", "Polish", "Hindi", "Arabic"]
 def clear_all():
-    global meeting_texts
     meeting_texts = []  # Reset meeting texts
     return [None] * (n_participants * 4 + 1)  # Reset outputs of transcripts, translated texts, and dubbed videos
@@ -136,20 +136,17 @@ def create_dub_from_file(
         return None
-def summarize(meeting_texts=meeting_texts):
     mt = ', '.join([f"{k}: {v}" for i in meeting_texts for k, v in i.items()])
     meeting_date_time = str(datetime.now().strftime("%Y-%m-%d %H:%M:%S"))
     meeting_texts = meeting_date_time + '\n' + mt
-    # meeting_conversation_processed ='\n'.join(mt)
-    # print("M:", session_conversation_processed)
     minutes_of_meeting = ""
     for chunk in AI71(AI71_API_KEY.strip()).chat.completions.create(
         model="tiiuae/falcon-180b-chat",
         messages=[
             {"role": "system", "content": f"""You are an expereiced Secretary who can summarize meeting discussions into minutes of meeting.
-            Summarize the meetings discussions provided as Speakerwise conversation.
             Strictly consider only the context given in user content {meeting_texts} for summarization.
             Ensure to mention the title as 'Minutes of Meeting held on {meeting_date_time} and present the summary with better viewing format and title in bold letters"""},
             {"role": "user", "content": meeting_texts},
@@ -198,12 +195,12 @@ def synthesize_speech(video, source_language,target_language):
     return dub_video
 # This function handles the processing when any participant speaks
-def process_speaker(video, speaker_idx, n_participants, *language_list):
     transcript = speech_to_text(video)
     # Create outputs for each participant
     outputs = []
-    global meeting_texts
     def process_translation_dubbing(i):
         if i != speaker_idx:
             participant_language = language_codes[language_list[i]]
@@ -223,6 +220,7 @@ def process_speaker(video, speaker_idx, n_participants, *language_list):
         else:
             outputs.append(translated_text)
             outputs.append(dubbed_video)
     if speaker_idx == 0:
         meeting_texts.append({f"Speaker_{speaker_idx+1}":outputs[0]})
     else:
@@ -230,9 +228,11 @@ def process_speaker(video, speaker_idx, n_participants, *language_list):
     print(len(outputs))
     print(outputs)
     print('meeting_texts: ',meeting_texts)
     return outputs
 def create_participant_row(i, language_choices):
     """Creates the UI for a single participant."""
     with gr.Row():
@@ -243,12 +243,15 @@ def create_participant_row(i, language_choices):
         dubbed_video = gr.Video(label="Speaker's Dubbed Video")
         return video_input, language_dropdown, transcript_output, translated_text, dubbed_video
 # Main dynamic Gradio interface
 def create_gradio_interface(n_participants, language_choices):
     with gr.Blocks() as demo:
         gr.Markdown("""# LinguaPolis: Bridging Languages, Uniting Teams Globally - Multilingual Conference Call Simulation
         ## Record your video or upload your video and press the corresponding Submit button at the bottom""")
         video_inputs = []
         language_dropdowns = []
         transcript_outputs = []
@@ -256,7 +259,7 @@ def create_gradio_interface(n_participants, language_choices):
         dubbed_videos = []
         clear_button = gr.Button("Clear All")
         # Create a row for each participant
         for i in range(n_participants):
             video_input, language_dropdown, transcript_output, translated_text, dubbed_video = create_participant_row(i, language_choices)
@@ -270,12 +273,13 @@ def create_gradio_interface(n_participants, language_choices):
         for i in range(n_participants):
             gr.Button(f"Submit Speaker {i+1}'s Speech").click(
                 process_speaker,
-                [video_inputs[i], gr.State(i), gr.State(n_participants)] + [language_dropdowns[j] for j in range(n_participants)],
-                [transcript_outputs[i]] + [k for j in zip(translated_texts[:i]+translated_texts[i+1:], dubbed_videos[:i]+dubbed_videos[i+1:]) for k in j]
            )
         minutes = gr.Textbox(label="Minutes of Meeting")
-        gr.Button(f"Generate Minutes of meeting").click(summarize, None, minutes)
         # Clear button to reset inputs and outputs
         clear_button.click(clear_all, None, [*video_inputs, *transcript_outputs, *translated_texts, *dubbed_videos, minutes])

 "Ukrainian":"uk", "Greek":"el", "Czech":"cs", "Danish":"da", "Finnish":"fi",
 "Bulgarian":"bg", "Croatian":"hr", "Slovak":"sk"}
+# meeting_texts = []
 n_participants = 4 # This can be adjusted based on the number of people in the call
 language_choices = ["English", "Polish", "Hindi", "Arabic"]
 def clear_all():
+    # global meeting_texts
     meeting_texts = []  # Reset meeting texts
     return [None] * (n_participants * 4 + 1)  # Reset outputs of transcripts, translated texts, and dubbed videos
         return None
+def summarize(meeting_texts):
     mt = ', '.join([f"{k}: {v}" for i in meeting_texts for k, v in i.items()])
     meeting_date_time = str(datetime.now().strftime("%Y-%m-%d %H:%M:%S"))
     meeting_texts = meeting_date_time + '\n' + mt
     minutes_of_meeting = ""
     for chunk in AI71(AI71_API_KEY.strip()).chat.completions.create(
         model="tiiuae/falcon-180b-chat",
         messages=[
             {"role": "system", "content": f"""You are an expereiced Secretary who can summarize meeting discussions into minutes of meeting.
+            Summarize the meetings discussions provided as Speakerwise conversation.
             Strictly consider only the context given in user content {meeting_texts} for summarization.
             Ensure to mention the title as 'Minutes of Meeting held on {meeting_date_time} and present the summary with better viewing format and title in bold letters"""},
             {"role": "user", "content": meeting_texts},
     return dub_video
 # This function handles the processing when any participant speaks
+def process_speaker(video, speaker_idx, n_participants, meeting_texts, *language_list):
     transcript = speech_to_text(video)
     # Create outputs for each participant
     outputs = []
+    # global meeting_texts
     def process_translation_dubbing(i):
         if i != speaker_idx:
             participant_language = language_codes[language_list[i]]
         else:
             outputs.append(translated_text)
             outputs.append(dubbed_video)
     if speaker_idx == 0:
         meeting_texts.append({f"Speaker_{speaker_idx+1}":outputs[0]})
     else:
     print(len(outputs))
     print(outputs)
+    outputs.extend(meeting_texts)
     print('meeting_texts: ',meeting_texts)
     return outputs
 def create_participant_row(i, language_choices):
     """Creates the UI for a single participant."""
     with gr.Row():
         dubbed_video = gr.Video(label="Speaker's Dubbed Video")
         return video_input, language_dropdown, transcript_output, translated_text, dubbed_video
 # Main dynamic Gradio interface
 def create_gradio_interface(n_participants, language_choices):
     with gr.Blocks() as demo:
         gr.Markdown("""# LinguaPolis: Bridging Languages, Uniting Teams Globally - Multilingual Conference Call Simulation
         ## Record your video or upload your video and press the corresponding Submit button at the bottom""")
+        meeting_texts = []
         video_inputs = []
         language_dropdowns = []
         transcript_outputs = []
         dubbed_videos = []
         clear_button = gr.Button("Clear All")
         # Create a row for each participant
         for i in range(n_participants):
             video_input, language_dropdown, transcript_output, translated_text, dubbed_video = create_participant_row(i, language_choices)
         for i in range(n_participants):
             gr.Button(f"Submit Speaker {i+1}'s Speech").click(
                 process_speaker,
+                # [video_inputs[i], gr.State(i), gr.State(n_participants)] + [language_dropdowns[j] for j in range(n_participants)],
+                [video_inputs[i], gr.State(i), gr.State(n_participants)] + [gr.State(meeting_texts)] + [language_dropdowns[j] for j in range(n_participants)],
+                [transcript_outputs[i]] + [k for j in zip(translated_texts[:i]+translated_texts[i+1:], dubbed_videos[:i]+dubbed_videos[i+1:]) for k in j] + [gr.State(meeting_texts)]
            )
         minutes = gr.Textbox(label="Minutes of Meeting")
+        gr.Button(f"Generate Minutes of meeting").click(summarize, meeting_texts, minutes)
         # Clear button to reset inputs and outputs
         clear_button.click(clear_all, None, [*video_inputs, *transcript_outputs, *translated_texts, *dubbed_videos, minutes])