vsrinivas commited on
Commit
874ec29
·
verified ·
1 Parent(s): fa6dd7e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +215 -38
app.py CHANGED
@@ -86,6 +86,194 @@ def download_dubbed_file(dubbing_id: str, language_code: str) -> str:
86
 
87
  return file_path
88
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
89
  def create_dub_from_file(
90
  input_file_path: str,
91
  file_format: str,
@@ -125,22 +313,20 @@ def create_dub_from_file(
125
  return None
126
 
127
 
128
- def summarize(meeting_texts=meeting_texts):
 
129
  meeting_texts = ', '.join([f"{k}: {v}" for i in meeting_texts for k, v in i.items()])
130
  meeting_date_time = str(datetime.now().strftime("%Y-%m-%d %H:%M:%S"))
131
- # meeting_texts = meeting_date_time + '\n' + meeting_texts
132
-
133
- # meeting_conversation_processed ='\n'.join(mt)
134
- # print("M:", session_conversation_processed)
135
 
136
  minutes_of_meeting = ""
137
  for chunk in AI71(AI71_API_KEY.strip()).chat.completions.create(
138
  model="tiiuae/falcon-180b-chat",
139
  messages=[
140
- {"role": "system", "content": f"""You are an expereiced Secretary who can summarize meeting discussions into minutes of meeting.
141
  Summarize the meetings discussions provided as Speakerwise conversation.
142
- Strictly consider only the context given in user content {meeting_texts} for summarization.
143
- Ensure to mention the title as 'Minutes of Meeting held on {meeting_date_time} and present the summary with better viewing format and title in bold letters"""},
144
  {"role": "user", "content": meeting_texts},
145
  ],
146
  stream=True,
@@ -148,8 +334,8 @@ def summarize(meeting_texts=meeting_texts):
148
  if chunk.choices[0].delta.content:
149
  summary = chunk.choices[0].delta.content
150
  minutes_of_meeting += summary
 
151
  minutes_of_meeting = minutes_of_meeting.replace('User:', '').strip()
152
- print("\n")
153
  print("minutes_of_meeting:", minutes_of_meeting)
154
  return minutes_of_meeting
155
 
@@ -186,13 +372,14 @@ def synthesize_speech(video, source_language,target_language):
186
  target_language = target_language)
187
  return dub_video
188
 
189
- # This function handles the processing when any participant speaks
190
- def process_speaker(video, speaker_idx, n_participants, *language_list):
 
191
  transcript = speech_to_text(video)
192
 
193
  # Create outputs for each participant
194
  outputs = []
195
- global meeting_texts
196
  def process_translation_dubbing(i):
197
  if i != speaker_idx:
198
  participant_language = language_codes[language_list[i]]
@@ -212,27 +399,18 @@ def process_speaker(video, speaker_idx, n_participants, *language_list):
212
  else:
213
  outputs.append(translated_text)
214
  outputs.append(dubbed_video)
 
215
  if speaker_idx == 0:
216
- meeting_texts.append({f"Speaker_{speaker_idx+1}":outputs[0]})
217
  else:
218
- meeting_texts.append({f"Speaker_{speaker_idx+1}":outputs[1]})
219
 
220
- print(len(outputs))
221
- print(outputs)
222
- print('meeting_texts: ',meeting_texts)
223
  return outputs
224
 
225
- def create_participant_row(i, language_choices):
226
- """Creates the UI for a single participant."""
227
- with gr.Row():
228
- video_input = gr.Video(label=f"Participant {i+1} Video", interactive=True)
229
- language_dropdown = gr.Dropdown(choices=language_choices, label=f"Participant {i+1} Language", value=language_choices[i])
230
- transcript_output = gr.Textbox(label=f"Participant {i+1} Transcript")
231
- translated_text = gr.Textbox(label="Speaker's Translated Text")
232
- dubbed_video = gr.Video(label="Speaker's Dubbed Video")
233
- return video_input, language_dropdown, transcript_output, translated_text, dubbed_video
234
-
235
- # Main dynamic Gradio interface
236
  def create_gradio_interface(n_participants, language_choices):
237
  with gr.Blocks() as demo:
238
  gr.Markdown("""# LinguaPolis: Bridging Languages, Uniting Teams Globally - Multilingual Conference Call Simulation
@@ -245,6 +423,7 @@ def create_gradio_interface(n_participants, language_choices):
245
  dubbed_videos = []
246
 
247
  clear_button = gr.Button("Clear All")
 
248
 
249
  # Create a row for each participant
250
  for i in range(n_participants):
@@ -259,17 +438,15 @@ def create_gradio_interface(n_participants, language_choices):
259
  for i in range(n_participants):
260
  gr.Button(f"Submit Speaker {i+1}'s Speech").click(
261
  process_speaker,
262
- [video_inputs[i], gr.State(i), gr.State(n_participants)] + [language_dropdowns[j] for j in range(n_participants)],
263
- [transcript_outputs[i]] + [k for j in zip(translated_texts[:i]+translated_texts[i+1:], dubbed_videos[:i]+dubbed_videos[i+1:]) for k in j]
264
- )
 
265
  minutes = gr.Textbox(label="Minutes of Meeting")
266
- gr.Button(f"Generate Minutes of meeting").click(summarize, None, minutes)
267
 
268
  # Clear button to reset inputs and outputs
269
- clear_button.click(clear_all, None, [*video_inputs, *transcript_outputs, *translated_texts, *dubbed_videos, minutes])
270
-
271
- # Launch with .queue() to keep it running properly in Jupyter
272
- demo.queue().launch(debug=True, share=True)
273
-
274
 
275
- create_gradio_interface(n_participants, language_choices)
 
 
86
 
87
  return file_path
88
 
89
+ # def create_dub_from_file(
90
+ # input_file_path: str,
91
+ # file_format: str,
92
+ # source_language: str,
93
+ # target_language: str,
94
+ # ):
95
+ # # ) -> Optional[str]:
96
+ # """
97
+ # Dubs an audio or video file from one language to another and saves the output.
98
+ # Args:
99
+ # input_file_path (str): The file path of the audio or video to dub.
100
+ # file_format (str): The file format of the input file.
101
+ # source_language (str): The language of the input file.
102
+ # target_language (str): The target language to dub into.
103
+ # Returns:
104
+ # Optional[str]: The file path of the dubbed file or None if operation failed.
105
+ # """
106
+ # if not os.path.isfile(input_file_path):
107
+ # raise FileNotFoundError(f"The input file does not exist: {input_file_path}")
108
+
109
+ # with open(input_file_path, "rb") as audio_file:
110
+ # response = client.dubbing.dub_a_video_or_an_audio_file(
111
+ # file=(os.path.basename(input_file_path), audio_file, file_format), # Optional file
112
+ # target_lang=target_language, # The target language to dub the content into. Can be none if dubbing studio editor is enabled and running manual mode
113
+ # # mode="automatic", # automatic or manual.
114
+ # source_lang=source_language, # Source language
115
+ # num_speakers=1, # Number of speakers to use for the dubbing.
116
+ # watermark=True, # Whether to apply watermark to the output video.
117
+ # )
118
+
119
+ # # rest of the code
120
+ # dubbing_id = response.dubbing_id
121
+ # if wait_for_dubbing_completion(dubbing_id):
122
+ # output_file_path = download_dubbed_file(dubbing_id, target_language)
123
+ # return output_file_path
124
+ # else:
125
+ # return None
126
+
127
+
128
+ # def summarize(meeting_texts=meeting_texts):
129
+ # meeting_texts = ', '.join([f"{k}: {v}" for i in meeting_texts for k, v in i.items()])
130
+ # meeting_date_time = str(datetime.now().strftime("%Y-%m-%d %H:%M:%S"))
131
+ # # meeting_texts = meeting_date_time + '\n' + meeting_texts
132
+
133
+ # # meeting_conversation_processed ='\n'.join(mt)
134
+ # # print("M:", session_conversation_processed)
135
+
136
+ # minutes_of_meeting = ""
137
+ # for chunk in AI71(AI71_API_KEY.strip()).chat.completions.create(
138
+ # model="tiiuae/falcon-180b-chat",
139
+ # messages=[
140
+ # {"role": "system", "content": f"""You are an expereiced Secretary who can summarize meeting discussions into minutes of meeting.
141
+ # Summarize the meetings discussions provided as Speakerwise conversation.
142
+ # Strictly consider only the context given in user content {meeting_texts} for summarization.
143
+ # Ensure to mention the title as 'Minutes of Meeting held on {meeting_date_time} and present the summary with better viewing format and title in bold letters"""},
144
+ # {"role": "user", "content": meeting_texts},
145
+ # ],
146
+ # stream=True,
147
+ # ):
148
+ # if chunk.choices[0].delta.content:
149
+ # summary = chunk.choices[0].delta.content
150
+ # minutes_of_meeting += summary
151
+ # minutes_of_meeting = minutes_of_meeting.replace('User:', '').strip()
152
+ # print("\n")
153
+ # print("minutes_of_meeting:", minutes_of_meeting)
154
+ # return minutes_of_meeting
155
+
156
+
157
+ # # Placeholder function for speech to text conversion
158
+ # def speech_to_text(video):
159
+ # print(video, type(video))
160
+ # print('Started transcribing')
161
+ # audio = AudioSegment.from_file(video)
162
+ # audio.export('temp.wav', format="wav")
163
+
164
+ # # transcript = transcriber.transcribe(video).text
165
+ # # transcript = transcriber.transcribe(video).text
166
+ # transcript = transcriber("temp.wav").split("'")[1].strip()
167
+
168
+ # print('transcript:', transcript)
169
+ # return transcript
170
+
171
+ # # Placeholder function for translating text
172
+ # def translate_text(text, source_language,target_language):
173
+ # tokenizer.src_lang = source_language
174
+ # encoded_ln = tokenizer(text, return_tensors="pt")
175
+ # generated_tokens = translator.generate(**encoded_ln, forced_bos_token_id=tokenizer.get_lang_id(target_language))
176
+ # translated_text = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)[0]
177
+ # print('translated_text:', translated_text)
178
+ # return translated_text
179
+
180
+ # # Placeholder function for dubbing (text-to-speech in another language)
181
+ # def synthesize_speech(video, source_language,target_language):
182
+ # print('Started dubbing')
183
+ # dub_video = create_dub_from_file(input_file_path = video,
184
+ # file_format = 'audio/mpeg',
185
+ # source_language = source_language,
186
+ # target_language = target_language)
187
+ # return dub_video
188
+
189
+ # # This function handles the processing when any participant speaks
190
+ # def process_speaker(video, speaker_idx, n_participants, *language_list):
191
+ # transcript = speech_to_text(video)
192
+
193
+ # # Create outputs for each participant
194
+ # outputs = []
195
+ # global meeting_texts
196
+ # def process_translation_dubbing(i):
197
+ # if i != speaker_idx:
198
+ # participant_language = language_codes[language_list[i]]
199
+ # speaker_language = language_codes[language_list[speaker_idx]]
200
+ # translated_text = translate_text(transcript, speaker_language, participant_language)
201
+ # dubbed_video = synthesize_speech(video, speaker_language, participant_language)
202
+ # return translated_text, dubbed_video
203
+ # return None, None
204
+
205
+ # with concurrent.futures.ThreadPoolExecutor() as executor:
206
+ # futures = [executor.submit(process_translation_dubbing, i) for i in range(n_participants)]
207
+ # results = [f.result() for f in futures]
208
+
209
+ # for i, (translated_text, dubbed_video) in enumerate(results):
210
+ # if i == speaker_idx:
211
+ # outputs.insert(0, transcript)
212
+ # else:
213
+ # outputs.append(translated_text)
214
+ # outputs.append(dubbed_video)
215
+ # if speaker_idx == 0:
216
+ # meeting_texts.append({f"Speaker_{speaker_idx+1}":outputs[0]})
217
+ # else:
218
+ # meeting_texts.append({f"Speaker_{speaker_idx+1}":outputs[1]})
219
+
220
+ # print(len(outputs))
221
+ # print(outputs)
222
+ # print('meeting_texts: ',meeting_texts)
223
+ # return outputs
224
+
225
+ # def create_participant_row(i, language_choices):
226
+ # """Creates the UI for a single participant."""
227
+ # with gr.Row():
228
+ # video_input = gr.Video(label=f"Participant {i+1} Video", interactive=True)
229
+ # language_dropdown = gr.Dropdown(choices=language_choices, label=f"Participant {i+1} Language", value=language_choices[i])
230
+ # transcript_output = gr.Textbox(label=f"Participant {i+1} Transcript")
231
+ # translated_text = gr.Textbox(label="Speaker's Translated Text")
232
+ # dubbed_video = gr.Video(label="Speaker's Dubbed Video")
233
+ # return video_input, language_dropdown, transcript_output, translated_text, dubbed_video
234
+
235
+ # # Main dynamic Gradio interface
236
+ # def create_gradio_interface(n_participants, language_choices):
237
+ # with gr.Blocks() as demo:
238
+ # gr.Markdown("""# LinguaPolis: Bridging Languages, Uniting Teams Globally - Multilingual Conference Call Simulation
239
+ # ## Record your video or upload your video and press the corresponding Submit button at the bottom""")
240
+
241
+ # video_inputs = []
242
+ # language_dropdowns = []
243
+ # transcript_outputs = []
244
+ # translated_texts = []
245
+ # dubbed_videos = []
246
+
247
+ # clear_button = gr.Button("Clear All")
248
+
249
+ # # Create a row for each participant
250
+ # for i in range(n_participants):
251
+ # video_input, language_dropdown, transcript_output, translated_text, dubbed_video = create_participant_row(i, language_choices)
252
+ # video_inputs.append(video_input)
253
+ # language_dropdowns.append(language_dropdown)
254
+ # transcript_outputs.append(transcript_output)
255
+ # translated_texts.append(translated_text)
256
+ # dubbed_videos.append(dubbed_video)
257
+
258
+ # # Create dynamic processing buttons for each participant
259
+ # for i in range(n_participants):
260
+ # gr.Button(f"Submit Speaker {i+1}'s Speech").click(
261
+ # process_speaker,
262
+ # [video_inputs[i], gr.State(i), gr.State(n_participants)] + [language_dropdowns[j] for j in range(n_participants)],
263
+ # [transcript_outputs[i]] + [k for j in zip(translated_texts[:i]+translated_texts[i+1:], dubbed_videos[:i]+dubbed_videos[i+1:]) for k in j]
264
+ # )
265
+ # minutes = gr.Textbox(label="Minutes of Meeting")
266
+ # gr.Button(f"Generate Minutes of meeting").click(summarize, None, minutes)
267
+
268
+ # # Clear button to reset inputs and outputs
269
+ # clear_button.click(clear_all, None, [*video_inputs, *transcript_outputs, *translated_texts, *dubbed_videos, minutes])
270
+
271
+ # # Launch with .queue() to keep it running properly in Jupyter
272
+ # demo.queue().launch(debug=True, share=True)
273
+
274
+
275
+ # create_gradio_interface(n_participants, language_choices)
276
+
277
  def create_dub_from_file(
278
  input_file_path: str,
279
  file_format: str,
 
313
  return None
314
 
315
 
316
+ # Modify the summarize function to accept and return meeting_texts
317
+ def summarize(meeting_texts):
318
  meeting_texts = ', '.join([f"{k}: {v}" for i in meeting_texts for k, v in i.items()])
319
  meeting_date_time = str(datetime.now().strftime("%Y-%m-%d %H:%M:%S"))
320
+ # meeting_texts_str = meeting_date_time + '\n' + mt
 
 
 
321
 
322
  minutes_of_meeting = ""
323
  for chunk in AI71(AI71_API_KEY.strip()).chat.completions.create(
324
  model="tiiuae/falcon-180b-chat",
325
  messages=[
326
+ {"role": "system", "content": f"""You are an experienced Secretary who can summarize meeting discussions into minutes of meeting.
327
  Summarize the meetings discussions provided as Speakerwise conversation.
328
+ Strictly consider only the context given in user content for summarization.
329
+ Ensure to mention the title as 'Minutes of Meeting held on {meeting_date_time}' and present the summary with better viewing format and title in bold letters."""},
330
  {"role": "user", "content": meeting_texts},
331
  ],
332
  stream=True,
 
334
  if chunk.choices[0].delta.content:
335
  summary = chunk.choices[0].delta.content
336
  minutes_of_meeting += summary
337
+
338
  minutes_of_meeting = minutes_of_meeting.replace('User:', '').strip()
 
339
  print("minutes_of_meeting:", minutes_of_meeting)
340
  return minutes_of_meeting
341
 
 
372
  target_language = target_language)
373
  return dub_video
374
 
375
+
376
+ # Update process_speaker function to accept and return meeting_texts
377
+ def process_speaker(video, speaker_idx, n_participants, meeting_texts, *language_list):
378
  transcript = speech_to_text(video)
379
 
380
  # Create outputs for each participant
381
  outputs = []
382
+
383
  def process_translation_dubbing(i):
384
  if i != speaker_idx:
385
  participant_language = language_codes[language_list[i]]
 
399
  else:
400
  outputs.append(translated_text)
401
  outputs.append(dubbed_video)
402
+
403
  if speaker_idx == 0:
404
+ meeting_texts.append({f"Speaker_{speaker_idx+1}": outputs[0]})
405
  else:
406
+ meeting_texts.append({f"Speaker_{speaker_idx+1}": outputs[1]})
407
 
408
+ print("meeting_texts:", meeting_texts)
409
+ print('outputs:', outputs)
410
+ outputs.append(meeting_texts)
411
  return outputs
412
 
413
+ # Modify the Gradio interface to manage the meeting_texts between function calls
 
 
 
 
 
 
 
 
 
 
414
  def create_gradio_interface(n_participants, language_choices):
415
  with gr.Blocks() as demo:
416
  gr.Markdown("""# LinguaPolis: Bridging Languages, Uniting Teams Globally - Multilingual Conference Call Simulation
 
423
  dubbed_videos = []
424
 
425
  clear_button = gr.Button("Clear All")
426
+ meeting_texts = gr.State([]) # Initialize meeting_texts as a Gradio State
427
 
428
  # Create a row for each participant
429
  for i in range(n_participants):
 
438
  for i in range(n_participants):
439
  gr.Button(f"Submit Speaker {i+1}'s Speech").click(
440
  process_speaker,
441
+ [video_inputs[i], gr.State(i), gr.State(n_participants), meeting_texts] + [language_dropdowns[j] for j in range(n_participants)],
442
+ [transcript_outputs[i]] + [k for j in zip(translated_texts[:i]+translated_texts[i+1:], dubbed_videos[:i]+dubbed_videos[i+1:]) for k in j] + [meeting_texts]
443
+ )
444
+
445
  minutes = gr.Textbox(label="Minutes of Meeting")
446
+ gr.Button(f"Generate Minutes of meeting").click(summarize, [meeting_texts], minutes)
447
 
448
  # Clear button to reset inputs and outputs
449
+ clear_button.click(clear_all, None, [*video_inputs, *transcript_outputs, *translated_texts, *dubbed_videos, minutes, meeting_texts])
 
 
 
 
450
 
451
+ demo.launch(debug=True, share=True)
452
+ create_gradio_interface(4, language_choices)