lelafav502 commited on
Commit
fee32f6
·
verified ·
1 Parent(s): a82912e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +63 -15
app.py CHANGED
@@ -1,28 +1,76 @@
1
- import gradio as gr
2
- from faster_whisper import WhisperModel
3
  import json
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
 
5
- def transcribe_audio(audiofile):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
  model_size = "medium"
7
  model = WhisperModel(model_size)
8
 
9
- segments, info = model.transcribe(audiofile, word_timestamps=True)
10
  segments = list(segments) # The transcription will actually run here.
11
-
12
  wordlevel_info = []
 
13
  for segment in segments:
14
  for word in segment.words:
15
- wordlevel_info.append({'word':word.word,'start':word.start,'end':word.end})
 
 
 
16
 
17
- # Save wordlevel_info to data.json
18
- with open('data.json', 'w') as f:
19
- json.dump(wordlevel_info, f)
20
 
21
- return "Transcription complete. Check data.json for results."
 
22
 
23
- # Define the Gradio interface
24
- audiofile_input = gr.inputs.Audio(label="Upload your audio file")
25
- output_text = gr.outputs.Textbox(label="Transcription")
26
 
27
- # Create Gradio interface
28
- gr.Interface(transcribe_audio, inputs=audiofile_input, outputs=output_text).launch()
 
 
 
1
  import json
2
+ import gradio as gr
3
+ from faster_whisper import WhisperModel # Assuming you have installed this library
4
+
5
+ def split_text_into_lines(data):
6
+ MaxChars = 30
7
+ MaxDuration = 2.5
8
+ MaxGap = 1.5
9
+
10
+ subtitles = []
11
+ line = []
12
+ line_duration = 0
13
+
14
+ for idx, word_data in enumerate(data):
15
+ word = word_data["word"]
16
+ start = word_data["start"]
17
+ end = word_data["end"]
18
+
19
+ line.append(word_data)
20
+ line_duration += end - start
21
+
22
+ temp = " ".join(item["word"] for item in line)
23
 
24
+ duration_exceeded = line_duration > MaxDuration
25
+ chars_exceeded = len(temp) > MaxChars
26
+ maxgap_exceeded = (word_data['start'] - data[idx - 1]['end']) > MaxGap if idx > 0 else False
27
+
28
+ if duration_exceeded or chars_exceeded or maxgap_exceeded:
29
+ if line:
30
+ subtitle_line = {
31
+ "word": temp,
32
+ "start": line[0]["start"],
33
+ "end": line[-1]["end"],
34
+ "textcontents": line
35
+ }
36
+ subtitles.append(subtitle_line)
37
+ line = []
38
+ line_duration = 0
39
+
40
+ if line:
41
+ subtitle_line = {
42
+ "word": " ".join(item["word"] for item in line),
43
+ "start": line[0]["start"],
44
+ "end": line[-1]["end"],
45
+ "textcontents": line
46
+ }
47
+ subtitles.append(subtitle_line)
48
+
49
+ return subtitles
50
+
51
+ def transcribe_audio(audiofilename):
52
  model_size = "medium"
53
  model = WhisperModel(model_size)
54
 
55
+ segments, info = model.transcribe(audiofilename, word_timestamps=True)
56
  segments = list(segments) # The transcription will actually run here.
 
57
  wordlevel_info = []
58
+
59
  for segment in segments:
60
  for word in segment.words:
61
+ wordlevel_info.append({'word': word.word, 'start': word.start, 'end': word.end})
62
+
63
+ linelevel_subtitles = split_text_into_lines(wordlevel_info)
64
+ return linelevel_subtitles
65
 
66
+ def audio_transcription(audiofile):
67
+ transcription = transcribe_audio(audiofile.name)
68
+ return transcription
69
 
70
+ inputs = gr.inputs.Audio(label="Upload Audio File")
71
+ outputs = gr.outputs.Json(label="Transcription Output")
72
 
73
+ title = "Audio Transcription"
74
+ description = "Upload an audio file and get the transcription in JSON format."
 
75
 
76
+ gr.Interface(fn=audio_transcription, inputs=inputs, outputs=outputs, title=title, description=description).launch()