BALAKA commited on
Commit
94d8454
·
1 Parent(s): 9223a38
Files changed (1) hide show
  1. app.py +341 -160
app.py CHANGED
@@ -1,172 +1,353 @@
1
  import gradio as gr
2
  import numpy as np
3
  import librosa
 
4
  import requests
5
  import torch
6
  import torchaudio
7
  import math
8
  import os
9
- import soundfile as sf
10
  from glob import glob
11
  from pytube import YouTube
12
- from transformers import (
13
- Wav2Vec2CTCTokenizer,
14
- Wav2Vec2FeatureExtractor,
15
- Wav2Vec2Processor,
16
- Wav2Vec2ForCTC,
17
- TrainingArguments,
18
- Trainer,
19
- pipeline
20
- )
21
- processor = Wav2Vec2Processor.from_pretrained(
22
- "airesearch/wav2vec2-large-xlsr-53-th")
23
- model = Wav2Vec2ForCTC.from_pretrained(
24
- "BALAKA/wav2vec2-large-xlsr-53-th-swear-words")
25
-
26
- demo = gr.Blocks()
27
-
28
-
29
- def check(sentence):
30
- found = []
31
- negative = ["กระดอ", "กระทิง", "กระสัน", "กระหรี่", "กรีด", "กวนส้นตีน", "กะหรี่", "กินขี้ปี้เยี่ยว", "ขายตัว", "ขี้", "ขโมย", "ข่มขืน", "ควย", "ควาย", "คอขาด", "ฆ่า", "ค่า", "จังไร", "จัญไร", "ฉิบหาย", "ฉี่", "ชั่ว", "ชาติหมา", "ชิงหมาเกิด", "ชิบหาย", "ช้างเย็ด", "ดาก", "ตอแหล", "ตัดหัว", "ตัดหำ", "ตาย", "ตีกัน", "ทรมาน", "ทาส", "ทุเรศ", "นรก", "บีบคอ", "ปากหมา", "ปี้กัน", "พ่อง", "พ่อมึง", "ฟัก", "ฟาย", "ยัดแม่", "ยิงกัน", "ระยำ", "ดอกทอง", "โสเภณี", "ล่อกัน", "ศพ", "สถุล", "สทุน", "สัด", "สันดาน", "สัส", "สาด", "ส้นตีน", "หน้าตัวเมืย", "ส้นตีน", "หมอย", "หรรม", "หัวแตก", "หำ", "หน้าหี", "น่าหี", "อนาจาร", "อัปปรี", "อีช้าง", "อีปลาวาฬ", "อีสัด", "อีหน้าหี", "อีหมา", "ห่า", "อับปรี", "เฆี่ยน", "เงี่ยน", "เจี๊ยว", "เชี่ย", "เด้า", "เผด็จการ", "เยี่ยว", "เย็ด", "เลือด", "เสือก", "เหล้า", "เหี้ย", "เอากัน", "แดก", "แตด", "แทง", "แม่ง", "แม่มึง", "แรด", "โคตร", "โง่", "โป๊", "โรคจิต", "ใจหมา", "ไอเข้", "ไอ้ขึ้หมา", "ไอ้บ้า", "ไอ้หมา", "เวร", "เวน", "ไอ้มืด", "ไอ้ดำ", "นิกก้า", "คนดำ", "นิโก", "บิช", "ดาก", "ปืน", "กระสุน", "โลลิ", ]
32
- negative = list(dict.fromkeys(negative))
33
- for i in negative:
34
- if sentence.find(i) != -1:
35
- found.append(i)
36
- return found
37
-
38
-
39
- def resample(file_path):
40
- speech_array, sampling_rate = torchaudio.load(file_path)
41
- resampler = torchaudio.transforms.Resample(sampling_rate, 16000)
42
- return resampler(speech_array)[0].numpy()
43
-
44
-
45
- def tran_script(file_path):
46
- if isinstance(file_path, str):
47
- speech = resample(file_path)
48
- inputs = processor(speech, sampling_rate=16_000,
49
- return_tensors="pt", padding=True)
50
- logits = model(inputs.input_values).logits
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
  predicted_ids = torch.argmax(logits, dim=-1)
52
- predicted_sentence = processor.batch_decode(predicted_ids)
53
- return predicted_sentence
54
- else:
55
- now_path = glob('/home/user/app/split_*.mp3')
56
- sentence = []
57
- for i in range(file_path - 1):
58
- now_path = f'/home/user/app/split_{i+1}.mp3'
59
- speech = resample(now_path)
60
- inputs = processor(speech, sampling_rate=16_000,
61
- return_tensors="pt", padding=True)
62
- logits = model(inputs.input_values).logits
63
- predicted_ids = torch.argmax(logits, dim=-1)
64
- predicted_sentence = processor.batch_decode(predicted_ids)
65
- sentence.append(predicted_sentence)
66
- return sentence
67
-
68
-
69
- def split_file(file_path):
70
- speech, sample_rate = librosa.load(file_path)
71
- buffer = 5 * sample_rate
72
- samples_total = len(speech)
73
- samples_wrote = 0
74
- counter = 1
75
-
76
- while samples_wrote < samples_total:
77
-
78
- if buffer > (samples_total - samples_wrote):
79
- buffer = samples_total - samples_wrote
80
-
81
- block = speech[samples_wrote: (samples_wrote + buffer)]
82
- out_filename = "split_" + str(counter) + ".mp3"
83
-
84
- sf.write(out_filename, block, sample_rate)
85
- counter += 1
86
- samples_wrote += buffer
87
- return counter
88
-
89
-
90
- def process(file_path):
91
- if librosa.get_duration(filename=file_path) <= 5:
92
- sentence = tran_script(file_path)
93
- sentence = str(sentence).replace(' ', '').strip("[]")
94
- return 'found at 0.00m 0.00m 0.00-0.05 seconds found ' + str(check(sentence))
95
- counter = split_file(file_path)
96
- sentence = tran_script(counter)
97
- result = ''
98
- for index, item in enumerate(sentence):
99
- now_sentence = item[0]
100
- now_sentence = str(item).replace(' ', '').strip("[]grt")
101
- now_sentence = check(now_sentence)
102
- if now_sentence:
103
- time = (index)*5
104
- minutes = math.floor(time / 60)
105
- hours = math.floor(minutes/60)
106
- seconds = time % 60
107
- minutes = str(minutes).zfill(2)
108
- hours = str(hours).zfill(2)
109
- fist_seconds = str(seconds).zfill(2)
110
- last_seconds = str(seconds+5).zfill(2)
111
- text = f'found at {hours}h {minutes}m {fist_seconds}-{last_seconds}seconds found {now_sentence}'
112
- result += text + '\n'
113
- return result
114
-
115
-
116
- def youtube_loader(link):
117
- yt = YouTube(str(link))
118
- video = yt.streams.filter(only_audio=True).first()
119
- out_file = video.download(output_path='mp3')
120
- os.rename(out_file, '/home/user/app/mp3/youtube.mp3')
121
- return process('/home/user/app/mp3/youtube.mp3')
122
-
123
-
124
- def twitch_loader(link):
125
- os.system(f"twitch-dl download -q audio_only {link} --output twitch.wav")
126
- return process('/home/user/app/twitch.wav')
127
-
128
-
129
- with demo:
130
- gr.Markdown("Select your input type.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
131
  with gr.Tabs():
132
- with gr.TabItem("From your voice."):
133
- with gr.Row():
134
- voice = gr.Audio(source="microphone", type="filepath",
135
- optional=True, labe="Start record your voice here.")
136
- voice_output = gr.Textbox(labe="Your output is here.")
137
- text_button1 = gr.Button("Submit")
138
- with gr.TabItem("From your file."):
139
- with gr.Row():
140
- file_input = gr.Audio(
141
- type="filepath", optional=True, labe="Drop your audio file here.")
142
- file_output = gr.Textbox(labe="Your output is here.")
143
- text_button4 = gr.Button("Submit")
144
- gr.Examples([["ex/ex1.mp3"], ["ex/ex2.mp3"]],
145
- inputs=file_input, outputs=file_output, fn=process)
146
- with gr.TabItem("From youtube"):
147
- with gr.Row():
148
- youtube_input = gr.Textbox(
149
- label="Insert your youtube link here.", placeholder='https://www.youtube.com/watch?v=dQw4w9WgXcQ')
150
- youtube_output = gr.Textbox(labe="Your output is here.")
151
- text_button2 = gr.Button("Submit")
152
- gr.Examples([["https://youtu.be/JwOJWFniWS8"], ["https://youtu.be/B8TvZyoucxM"]],
153
- inputs=youtube_input, outputs=youtube_output, fn=youtube_loader)
154
- with gr.TabItem("From twitch"):
155
- with gr.Row():
156
- twitch_input = gr.Textbox(label="Insert your twitch link or ID here.",
157
- placeholder='https://www.twitch.tv/videos/1823056925 or 1823056925')
158
- twitch_output = gr.Textbox(labe="Your output is here.")
159
- text_button3 = gr.Button("Submit")
160
- gr.Examples([["https://www.twitch.tv/videos/1823056925"], ["https://www.twitch.tv/videos/1827185416"]],
161
- inputs=twitch_input, outputs=twitch_output, fn=twitch_loader)
162
-
163
- text_button1.click(process, inputs=voice, outputs=voice_output)
164
- text_button2.click(youtube_loader, inputs=youtube_input,
165
- outputs=youtube_output)
166
- text_button3.click(twitch_loader, inputs=twitch_input,
167
- outputs=twitch_output)
168
- text_button4.click(process, inputs=file_input,
169
- outputs=file_output)
170
-
171
-
172
- demo.launch(enable_queue=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
  import numpy as np
3
  import librosa
4
+ import soundfile as sf
5
  import requests
6
  import torch
7
  import torchaudio
8
  import math
9
  import os
10
+ import shutil # For moving files
11
  from glob import glob
12
  from pytube import YouTube
13
+ import tempfile # For temporary files and directories
14
+ import subprocess # For calling external commands like twitch-dl
15
+ from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor # Assuming Wav2Vec2
16
+
17
+ # --- Constants ---
18
+ NEGATIVE_WORDS = set([
19
+ "กระดอ", "กระทิง", "กระสัน", "กระหรี่", "กรีด", "กวนส้นตีน", "กะหรี่", "กินขี้ปี้เยี่ยว", "ขายตัว", "ขี้", "ขโมย", "ข่มขืน",
20
+ "ควย", "ควาย", "คอขาด", "ฆ่า", "จังไร", "จัญไร", "ฉิบหาย", "ฉี่", "ชั่ว", "ชาติหมา", "ชิงหมาเกิด", "ชิบหาย", "ช้างเย็ด",
21
+ "ดาก", "ตอแหล", "ตัดหัว", "ตัดหำ", "ตาย", "ตีกัน", "ทรมาน", "ทาส", "ทุเรศ", "นรก", "บีบคอ", "ปากหมา", "ปี้กัน", "พ่อง",
22
+ "พ่อมึง", "ฟักยู", "ฟาย", "ยัดแม่", "ยิงกัน", "ระยำ", "ดอกทอง", "โสเภณี", "ล่อกัน", "ศพ", "สถุล", "สทุน", "สัด", "สันดาน",
23
+ "สัส", "สาด", "ส้นตีน", "หน้าตัวเมืย", "หมอย", "หรรม", "หัวแตก", "หำ", "หน้าหี", "น่าหี", "อนาจาร", "อัปปรี", "อีช้าง",
24
+ "อีปลาวาฬ", "อีสัด", "อีหน้าหี", "อีหมา", "ห่า", "อับปรี", "เฆี่ยน", "เงี่ยน", "เจี๊ยว", "เชี่ย", "เด้า", "เผด็จการ",
25
+ "เยี่ยว", "เย็ด", "เลือด", "เสือก", "เหล้า", "เหี้ย", "เอากัน", "แดก", "แตด", "แทง", "แม่ง", "แม่มึง", "แรด", "โคตร",
26
+ "โง่", "โป๊", "โรคจิต", "ใจหมา", "ไอเข้", "ไอ้ขึ้หมา", "ไอ้บ้า", "ไอ้หมา", "เวร", "เวน"
27
+ ])
28
+ CHUNK_DURATION_S = 5
29
+ TARGET_SAMPLE_RATE = 16000
30
+ MODEL_NAME = "airesearch/wav2vec2-large-xlsr-53-th"
31
+ EXAMPLE_AUDIO_DIR = "audio_examples" # Directory for example audio files
32
+
33
+ # --- Global Model and Processor ---
34
+ try:
35
+ print(f"Loading model: {MODEL_NAME}...")
36
+ PROCESSOR = Wav2Vec2Processor.from_pretrained(MODEL_NAME)
37
+ MODEL = Wav2Vec2ForCTC.from_pretrained(MODEL_NAME)
38
+ MODEL.eval()
39
+ if torch.cuda.is_available():
40
+ MODEL.to("cuda")
41
+ print("Model loaded successfully.")
42
+ except Exception as e:
43
+ print(f"Error loading model: {e}")
44
+ PROCESSOR = None
45
+ MODEL = None
46
+
47
+ # --- Helper Functions (check_profanity, resample_audio, transcribe_chunk, split_audio_file, format_time) ---
48
+ # These functions remain the same as in the previous good version.
49
+ # For brevity, I'm omitting them here, but they should be included in your final script.
50
+
51
+ def check_profanity(sentence_text):
52
+ found_words = []
53
+ for profanity in NEGATIVE_WORDS:
54
+ if profanity in sentence_text:
55
+ found_words.append(profanity)
56
+ return found_words
57
+
58
+ def resample_audio(file_path, target_sr=TARGET_SAMPLE_RATE):
59
+ try:
60
+ speech_array, sampling_rate = torchaudio.load(file_path)
61
+ if sampling_rate != target_sr:
62
+ resampler = torchaudio.transforms.Resample(sampling_rate, target_sr)
63
+ speech_array = resampler(speech_array)
64
+ return speech_array[0].numpy()
65
+ except Exception as e:
66
+ print(f"Error resampling {file_path}: {e}")
67
+ return None
68
+
69
+ def transcribe_chunk(audio_np_array, sample_rate=TARGET_SAMPLE_RATE):
70
+ if MODEL is None or PROCESSOR is None:
71
+ return "[Model not loaded]"
72
+ try:
73
+ inputs = PROCESSOR(audio_np_array, sampling_rate=sample_rate, return_tensors="pt", padding=True)
74
+ input_values = inputs.input_values
75
+ if torch.cuda.is_available():
76
+ input_values = input_values.to("cuda")
77
+ with torch.no_grad():
78
+ logits = MODEL(input_values).logits
79
  predicted_ids = torch.argmax(logits, dim=-1)
80
+ transcription = PROCESSOR.batch_decode(predicted_ids)
81
+ return transcription[0] if transcription else ""
82
+ except Exception as e:
83
+ print(f"Error during transcription: {e}")
84
+ return "[Transcription Error]"
85
+
86
+ def split_audio_file(file_path, chunk_duration_s=CHUNK_DURATION_S, output_dir=None):
87
+ try:
88
+ speech, sample_rate = librosa.load(file_path, sr=None)
89
+ chunk_length_samples = int(chunk_duration_s * sample_rate) # Ensure int
90
+ samples_total = len(speech)
91
+ samples_wrote = 0
92
+ counter = 1
93
+ output_files = []
94
+
95
+ if output_dir is None:
96
+ print("Warning: output_dir not provided to split_audio_file. Saving to current dir.")
97
+ output_dir = "."
98
+
99
+ while samples_wrote < samples_total:
100
+ segment_end = samples_wrote + chunk_length_samples
101
+ block = speech[samples_wrote : min(segment_end, samples_total)]
102
+ out_filename = os.path.join(output_dir, f"split_{counter}.wav")
103
+ sf.write(out_filename, block, sample_rate)
104
+ output_files.append(out_filename)
105
+ counter += 1
106
+ samples_wrote += chunk_length_samples
107
+ return output_files
108
+ except Exception as e:
109
+ print(f"Error splitting file {file_path}: {e}")
110
+ return []
111
+
112
+ def format_time(seconds_total):
113
+ hours = math.floor(seconds_total / 3600)
114
+ minutes = math.floor((seconds_total % 3600) / 60)
115
+ seconds_start = math.floor(seconds_total % 60)
116
+ seconds_end = seconds_start + CHUNK_DURATION_S
117
+ return f"{hours:02d}h {minutes:02d}m {seconds_start:02d}-{seconds_end:02d}s"
118
+
119
+ # --- Main Processing Logic ---
120
+ def process_audio_file(audio_file_path):
121
+ if not audio_file_path or not os.path.exists(audio_file_path):
122
+ return "Error: Audio file not found or path is invalid."
123
+ if MODEL is None or PROCESSOR is None:
124
+ return "Error: Transcription model not loaded. Cannot process audio."
125
+
126
+ results_text = ""
127
+ try:
128
+ duration = librosa.get_duration(path=audio_file_path) # Use path for newer librosa
129
+
130
+ if duration <= CHUNK_DURATION_S:
131
+ resampled_audio = resample_audio(audio_file_path)
132
+ if resampled_audio is None:
133
+ return "Error: Could not resample audio."
134
+ transcription = transcribe_chunk(resampled_audio)
135
+ cleaned_transcription = transcription.replace(' ', '')
136
+ found_profanities = check_profanity(cleaned_transcription)
137
+ if found_profanities:
138
+ time_str = f"00h 00m 00-{math.ceil(duration):02d}s"
139
+ results_text = f"Found in short audio ({time_str}): {', '.join(found_profanities)}\n(Full: '{transcription}')"
140
+ else:
141
+ results_text = f"No profanity found in short audio.\n(Full: '{transcription}')"
142
+ else:
143
+ with tempfile.TemporaryDirectory() as temp_dir:
144
+ split_files = split_audio_file(audio_file_path, CHUNK_DURATION_S, output_dir=temp_dir)
145
+ if not split_files:
146
+ return "Error: Failed to split audio file."
147
+ all_transcriptions_info = []
148
+ profanity_found_overall = False
149
+ for i, chunk_file_path in enumerate(split_files):
150
+ resampled_audio = resample_audio(chunk_file_path)
151
+ if resampled_audio is None:
152
+ print(f"Warning: Could not resample chunk {chunk_file_path}, skipping.")
153
+ all_transcriptions_info.append(("[Resample Error]", []))
154
+ continue
155
+ transcription = transcribe_chunk(resampled_audio)
156
+ cleaned_transcription = transcription.replace(' ', '')
157
+ found_profanities = check_profanity(cleaned_transcription)
158
+ all_transcriptions_info.append((transcription, found_profanities))
159
+ if found_profanities:
160
+ profanity_found_overall = True
161
+ start_time_s = i * CHUNK_DURATION_S
162
+ time_str = format_time(start_time_s)
163
+ results_text += f"Found at {time_str}: {', '.join(found_profanities)}\n(Segment: '{transcription}')\n---\n"
164
+ if not profanity_found_overall:
165
+ results_text = "No profanity found in any segment.\n"
166
+
167
+ full_text_segments = [t[0] for t in all_transcriptions_info if t[0] not in ["[Resample Error]", "[Transcription Error]"]]
168
+ if full_text_segments:
169
+ results_text += f"\nFull approximate transcription:\n{' '.join(full_text_segments)}"
170
+ elif not profanity_found_overall : # if no profanity and no successful transcription
171
+ results_text = "No profanity found and could not generate full transcription."
172
+
173
+
174
+ return results_text.strip() if results_text else "Processing complete. No specific findings or transcription available."
175
+
176
+ except Exception as e:
177
+ print(f"Error processing audio file {audio_file_path}: {e}")
178
+ return f"An unexpected error occurred: {e}"
179
+
180
+ # --- Gradio Interface Callbacks (youtube_loader_and_process, twitch_loader_and_process) ---
181
+ # These functions remain the same as in the previous good version.
182
+ # For brevity, I'm omitting them here, but they should be included in your final script.
183
+
184
+ def youtube_loader_and_process(youtube_link):
185
+ if not youtube_link:
186
+ return "Please provide a YouTube link.", None
187
+ downloaded_file_path = None # Initialize
188
+ try:
189
+ print(f"Downloading YouTube video: {youtube_link}")
190
+ yt = YouTube(str(youtube_link))
191
+ video_stream = yt.streams.filter(only_audio=True).first()
192
+ if not video_stream:
193
+ return "No audio stream found for this YouTube video.", None
194
+ with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as tmp_audio_file:
195
+ video_stream.download(filename=tmp_audio_file.name)
196
+ downloaded_file_path = tmp_audio_file.name
197
+ print(f"Downloaded YouTube audio to: {downloaded_file_path}")
198
+ results = process_audio_file(downloaded_file_path)
199
+ return results, downloaded_file_path
200
+ except Exception as e:
201
+ print(f"Error downloading or processing YouTube link: {e}")
202
+ # If download failed before path was set, downloaded_file_path might still be None
203
+ return f"Error: {e}", downloaded_file_path if downloaded_file_path and os.path.exists(downloaded_file_path) else None
204
+ # No explicit finally: os.remove here, Gradio Audio component needs the file.
205
+ # Consider a cleanup strategy for long-running servers.
206
+
207
+ def twitch_loader_and_process(twitch_link_or_id):
208
+ if not twitch_link_or_id:
209
+ return "Please provide a Twitch link or VOD ID.", None
210
+ final_audio_path_for_gradio = None # Initialize
211
+ try:
212
+ print(f"Downloading Twitch VOD: {twitch_link_or_id}")
213
+ with tempfile.TemporaryDirectory() as temp_dir:
214
+ base_name = os.path.join(temp_dir, "twitch_audio")
215
+ # Try to make twitch-dl use a common audio/video suffix, though it might choose its own
216
+ command = ["twitch-dl", "download", "-q", "audio_only", twitch_link_or_id, "--output", base_name + ".%(format)s"]
217
+ print(f"Executing: {' '.join(command)}")
218
+ process_result = subprocess.run(command, capture_output=True, text=True, check=False)
219
+
220
+ if process_result.returncode != 0:
221
+ print(f"twitch-dl error: {process_result.stderr}")
222
+ return f"Error downloading Twitch VOD: {process_result.stderr}", None
223
+
224
+ downloaded_files = glob(os.path.join(temp_dir, "twitch_audio.*"))
225
+ if not downloaded_files:
226
+ # Fallback if filename pattern didn't work as expected
227
+ # twitch-dl might also create VODID.mkv or similar
228
+ # For robustness, search for any media file if the specific pattern fails
229
+ all_media_in_temp = [f for f_ext in ('.mkv', '.mp4', '.ts', '.aac', '.wav', '.mp3')
230
+ for f in glob(os.path.join(temp_dir, f"*{f_ext}"))]
231
+ if all_media_in_temp:
232
+ downloaded_files = all_media_in_temp
233
+
234
+
235
+ if not downloaded_files:
236
+ print(f"Twitch download completed, but output file not found in {temp_dir}. Check twitch-dl output naming.")
237
+ print(f"stdout: {process_result.stdout}")
238
+ print(f"stderr: {process_result.stderr}")
239
+ return "Twitch download completed, but output file not found.", None
240
+
241
+ downloaded_file_path = downloaded_files[0]
242
+ print(f"Downloaded Twitch audio to: {downloaded_file_path}")
243
+ results = process_audio_file(downloaded_file_path)
244
+ if os.path.exists(downloaded_file_path):
245
+ # Copy to a new temp file that Gradio can use and that persists beyond this function
246
+ with tempfile.NamedTemporaryFile(suffix=os.path.splitext(downloaded_file_path)[1], delete=False) as persistant_tmp_file:
247
+ shutil.copy2(downloaded_file_path, persistant_tmp_file.name)
248
+ final_audio_path_for_gradio = persistant_tmp_file.name
249
+ return results, final_audio_path_for_gradio
250
+ except FileNotFoundError:
251
+ return "Error: `twitch-dl` command not found. Please ensure it's installed and in your PATH.", None
252
+ except subprocess.CalledProcessError as e: # Should be caught by check=False and returncode !=0
253
+ print(f"Twitch-dl execution failed: {e.stderr if e.stderr else e.stdout}")
254
+ return f"Error executing twitch-dl: {e.stderr if e.stderr else e.stdout}", None
255
+ except Exception as e:
256
+ print(f"Error processing Twitch link: {e}")
257
+ return f"An unexpected error occurred: {e}", None
258
+
259
+
260
+ # --- Gradio UI Definition ---
261
+ with gr.Blocks(theme=gr.themes.Soft()) as demo: # Added a soft theme
262
+ gr.Markdown("# Audio Content Analyzer")
263
+ gr.Markdown("Transcribes audio and checks for specific words. Processes audio in 5-second chunks.")
264
+ if MODEL is None or PROCESSOR is None:
265
+ gr.Warning("Transcription model failed to load. Transcription features will not work.")
266
+
267
  with gr.Tabs():
268
+ # with gr.TabItem("From your voice (Microphone)"):
269
+ # with gr.Column():
270
+ # voice_input = gr.Audio(sources=["microphone"], type="filepath", label="Record or Upload Microphone Audio")
271
+ # voice_output_text = gr.Textbox(label="Analysis Results", lines=10, interactive=False)
272
+ # submit_voice_button = gr.Button("Submit Microphone Audio")
273
+
274
+ with gr.TabItem("From an Audio File"):
275
+ with gr.Column():
276
+ file_input = gr.Audio(sources=["upload"], type="filepath", label="Upload Audio File (.wav, .mp3, etc.)")
277
+
278
+ # --- MODIFIED SECTION: ADD EXAMPLES ---
279
+ # Ensure the 'audio_examples' directory exists and has audio files.
280
+ # Example: Create 'audio_examples/sample1.wav', 'audio_examples/another_sample.mp3'
281
+ if not os.path.exists(EXAMPLE_AUDIO_DIR):
282
+ gr.Markdown(f"_(Optional: Create a directory named '{EXAMPLE_AUDIO_DIR}' and add audio files to it for quick examples.)_")
283
+ else:
284
+ example_files_list = []
285
+ for ext in ("*.wav", "*.mp3", "*.flac", "*.m4a", "*.ogg"): # Common audio extensions
286
+ example_files_list.extend(glob(os.12path.join(EXAMPLE_AUDIO_DIR, ext)))
287
+
288
+ if example_files_list:
289
+ gr.Examples(
290
+ examples=sorted(example_files_list), # Sort for consistent order
291
+ inputs=file_input, # Clicking an example populates this input
292
+ label="Or select an example audio file:",
293
+ # examples_per_page=5 # Optional: if you have many examples
294
+ )
295
+ else:
296
+ gr.Markdown(f"_(No example audio files found in '{EXAMPLE_AUDIO_DIR}'. Add some .wav, .mp3, etc. files!)_")
297
+ # --- END MODIFIED SECTION ---
298
+
299
+ file_output_text = gr.Textbox(label="Analysis Results", lines=10, interactive=False)
300
+ submit_file_button = gr.Button("Submit Audio File")
301
+
302
+ with gr.TabItem("From YouTube Link"):
303
+ with gr.Column():
304
+ youtube_input_link = gr.Textbox(label="YouTube Video Link", placeholder="e.g., https://www.youtube.com/watch?v=dQw4w9WgXcQ")
305
+ youtube_output_audio = gr.Audio(label="Downloaded Audio", type="filepath", interactive=False)
306
+ youtube_output_text = gr.Textbox(label="Analysis Results", lines=10, interactive=False)
307
+ submit_youtube_button = gr.Button("Fetch and Analyze YouTube Audio")
308
+
309
+ with gr.TabItem("From Twitch VOD"):
310
+ with gr.Column():
311
+ twitch_input_link = gr.Textbox(label="Twitch VOD Link or ID", placeholder="e.g., https://www.twitch.tv/videos/123456789 or 123456789")
312
+ twitch_output_audio = gr.Audio(label="Downloaded Audio", type="filepath", interactive=False)
313
+ twitch_output_text = gr.Textbox(label="Analysis Results", lines=10, interactive=False)
314
+ submit_twitch_button = gr.Button("Fetch and Analyze Twitch VOD")
315
+
316
+ # --- Button Click Handlers ---
317
+ submit_voice_button.click(
318
+ fn=process_audio_file,
319
+ inputs=[voice_input],
320
+ outputs=[voice_output_text],
321
+ api_name="analyze_microphone_audio" # Add API name for programmatic access
322
+ )
323
+ submit_file_button.click(
324
+ fn=process_audio_file,
325
+ inputs=[file_input],
326
+ outputs=[file_output_text],
327
+ api_name="analyze_uploaded_audio"
328
+ )
329
+ submit_youtube_button.click(
330
+ fn=youtube_loader_and_process,
331
+ inputs=[youtube_input_link],
332
+ outputs=[youtube_output_text, youtube_output_audio],
333
+ api_name="analyze_youtube_audio"
334
+ )
335
+ submit_twitch_button.click(
336
+ fn=twitch_loader_and_process,
337
+ inputs=[twitch_input_link],
338
+ outputs=[twitch_output_text, twitch_output_audio],
339
+ api_name="analyze_twitch_audio"
340
+ )
341
+
342
+ if __name__ == "__main__":
343
+ # Create the example audio directory if it doesn't exist, for user convenience
344
+ if not os.path.exists(EXAMPLE_AUDIO_DIR):
345
+ try:
346
+ os.makedirs(EXAMPLE_AUDIO_DIR)
347
+ print(f"Created directory: {EXAMPLE_AUDIO_DIR}. Please add some audio files to it for examples.")
348
+ except OSError as e:
349
+ print(f"Could not create directory {EXAMPLE_AUDIO_DIR}: {e}")
350
+ else:
351
+ print(f"Example audio directory '{EXAMPLE_AUDIO_DIR}' already exists. Add audio files there if you haven't.")
352
+
353
+ demo.launch(share=True, debug=True)