Spaces:
Sleeping
Sleeping
import gradio as gr | |
import numpy as np | |
import librosa | |
import soundfile as sf | |
import requests | |
import torch | |
import torchaudio | |
import math | |
import os | |
import shutil # For moving files | |
from glob import glob | |
from pytube import YouTube | |
import tempfile # For temporary files and directories | |
import subprocess # For calling external commands like twitch-dl | |
from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor # Assuming Wav2Vec2 | |
# --- Constants --- | |
NEGATIVE_WORDS = set([ | |
"กระดอ", "กระทิง", "กระสัน", "กระหรี่", "กรีด", "กวนส้นตีน", "กะหรี่", "กินขี้ปี้เยี่ยว", "ขายตัว", "ขี้", "ขโมย", "ข่มขืน", | |
"ควย", "ควาย", "คอขาด", "ฆ่า", "จังไร", "จัญไร", "ฉิบหาย", "ฉี่", "ชั่ว", "ชาติหมา", "ชิงหมาเกิด", "ชิบหาย", "ช้างเย็ด", | |
"ดาก", "ตอแหล", "ตัดหัว", "ตัดหำ", "ตาย", "ตีกัน", "ทรมาน", "ทาส", "ทุเรศ", "นรก", "บีบคอ", "ปากหมา", "ปี้กัน", "พ่อง", | |
"พ่อมึง", "ฟักยู", "ฟาย", "ยัดแม่", "ยิงกัน", "ระยำ", "ดอกทอง", "โสเภณี", "ล่อกัน", "ศพ", "สถุล", "สทุน", "สัด", "สันดาน", | |
"สัส", "สาด", "ส้นตีน", "หน้าตัวเมืย", "หมอย", "หรรม", "หัวแตก", "หำ", "หน้าหี", "น่าหี", "อนาจาร", "อัปปรี", "อีช้าง", | |
"อีปลาวาฬ", "อีสัด", "อีหน้าหี", "อีหมา", "ห่า", "อับปรี", "เฆี่ยน", "เงี่ยน", "เจี๊ยว", "เชี่ย", "เด้า", "เผด็จการ", | |
"เยี่ยว", "เย็ด", "เลือด", "เสือก", "เหล้า", "เหี้ย", "เอากัน", "แดก", "แตด", "แทง", "แม่ง", "แม่มึง", "แรด", "โคตร", | |
"โง่", "โป๊", "โรคจิต", "ใจหมา", "ไอเข้", "ไอ้ขึ้หมา", "ไอ้บ้า", "ไอ้หมา", "เวร", "เวน" | |
]) | |
CHUNK_DURATION_S = 5 | |
TARGET_SAMPLE_RATE = 16000 | |
MODEL_NAME = "airesearch/wav2vec2-large-xlsr-53-th" | |
EXAMPLE_AUDIO_DIR = "ex" # Directory for example audio files | |
# --- Global Model and Processor --- | |
try: | |
print(f"Loading model: {MODEL_NAME}...") | |
PROCESSOR = Wav2Vec2Processor.from_pretrained(MODEL_NAME) | |
MODEL = Wav2Vec2ForCTC.from_pretrained(MODEL_NAME) | |
MODEL.eval() | |
if torch.cuda.is_available(): | |
MODEL.to("cuda") | |
print("Model loaded successfully.") | |
except Exception as e: | |
print(f"Error loading model: {e}") | |
PROCESSOR = None | |
MODEL = None | |
# --- Helper Functions (check_profanity, resample_audio, transcribe_chunk, split_audio_file, format_time) --- | |
# These functions remain the same as in the previous good version. | |
# For brevity, I'm omitting them here, but they should be included in your final script. | |
def check_profanity(sentence_text): | |
found_words = [] | |
for profanity in NEGATIVE_WORDS: | |
if profanity in sentence_text: | |
found_words.append(profanity) | |
return found_words | |
def resample_audio(file_path, target_sr=TARGET_SAMPLE_RATE): | |
try: | |
speech_array, sampling_rate = torchaudio.load(file_path) | |
if sampling_rate != target_sr: | |
resampler = torchaudio.transforms.Resample(sampling_rate, target_sr) | |
speech_array = resampler(speech_array) | |
return speech_array[0].numpy() | |
except Exception as e: | |
print(f"Error resampling {file_path}: {e}") | |
return None | |
def transcribe_chunk(audio_np_array, sample_rate=TARGET_SAMPLE_RATE): | |
if MODEL is None or PROCESSOR is None: | |
return "[Model not loaded]" | |
try: | |
inputs = PROCESSOR(audio_np_array, sampling_rate=sample_rate, return_tensors="pt", padding=True) | |
input_values = inputs.input_values | |
if torch.cuda.is_available(): | |
input_values = input_values.to("cuda") | |
with torch.no_grad(): | |
logits = MODEL(input_values).logits | |
predicted_ids = torch.argmax(logits, dim=-1) | |
transcription = PROCESSOR.batch_decode(predicted_ids) | |
return transcription[0] if transcription else "" | |
except Exception as e: | |
print(f"Error during transcription: {e}") | |
return "[Transcription Error]" | |
def split_audio_file(file_path, chunk_duration_s=CHUNK_DURATION_S, output_dir=None): | |
try: | |
speech, sample_rate = librosa.load(file_path, sr=None) | |
chunk_length_samples = int(chunk_duration_s * sample_rate) # Ensure int | |
samples_total = len(speech) | |
samples_wrote = 0 | |
counter = 1 | |
output_files = [] | |
if output_dir is None: | |
print("Warning: output_dir not provided to split_audio_file. Saving to current dir.") | |
output_dir = "." | |
while samples_wrote < samples_total: | |
segment_end = samples_wrote + chunk_length_samples | |
block = speech[samples_wrote : min(segment_end, samples_total)] | |
out_filename = os.path.join(output_dir, f"split_{counter}.wav") | |
sf.write(out_filename, block, sample_rate) | |
output_files.append(out_filename) | |
counter += 1 | |
samples_wrote += chunk_length_samples | |
return output_files | |
except Exception as e: | |
print(f"Error splitting file {file_path}: {e}") | |
return [] | |
def format_time(seconds_total): | |
hours = math.floor(seconds_total / 3600) | |
minutes = math.floor((seconds_total % 3600) / 60) | |
seconds_start = math.floor(seconds_total % 60) | |
seconds_end = seconds_start + CHUNK_DURATION_S | |
return f"{hours:02d}h {minutes:02d}m {seconds_start:02d}-{seconds_end:02d}s" | |
# --- Main Processing Logic --- | |
def process_audio_file(audio_file_path): | |
if not audio_file_path or not os.path.exists(audio_file_path): | |
return "Error: Audio file not found or path is invalid." | |
if MODEL is None or PROCESSOR is None: | |
return "Error: Transcription model not loaded. Cannot process audio." | |
results_text = "" | |
try: | |
duration = librosa.get_duration(path=audio_file_path) # Use path for newer librosa | |
if duration <= CHUNK_DURATION_S: | |
resampled_audio = resample_audio(audio_file_path) | |
if resampled_audio is None: | |
return "Error: Could not resample audio." | |
transcription = transcribe_chunk(resampled_audio) | |
cleaned_transcription = transcription.replace(' ', '') | |
found_profanities = check_profanity(cleaned_transcription) | |
if found_profanities: | |
time_str = f"00h 00m 00-{math.ceil(duration):02d}s" | |
results_text = f"Found in short audio ({time_str}): {', '.join(found_profanities)}\n(Full: '{transcription}')" | |
else: | |
results_text = f"No profanity found in short audio.\n(Full: '{transcription}')" | |
else: | |
with tempfile.TemporaryDirectory() as temp_dir: | |
split_files = split_audio_file(audio_file_path, CHUNK_DURATION_S, output_dir=temp_dir) | |
if not split_files: | |
return "Error: Failed to split audio file." | |
all_transcriptions_info = [] | |
profanity_found_overall = False | |
for i, chunk_file_path in enumerate(split_files): | |
resampled_audio = resample_audio(chunk_file_path) | |
if resampled_audio is None: | |
print(f"Warning: Could not resample chunk {chunk_file_path}, skipping.") | |
all_transcriptions_info.append(("[Resample Error]", [])) | |
continue | |
transcription = transcribe_chunk(resampled_audio) | |
cleaned_transcription = transcription.replace(' ', '') | |
found_profanities = check_profanity(cleaned_transcription) | |
all_transcriptions_info.append((transcription, found_profanities)) | |
if found_profanities: | |
profanity_found_overall = True | |
start_time_s = i * CHUNK_DURATION_S | |
time_str = format_time(start_time_s) | |
results_text += f"Found at {time_str}: {', '.join(found_profanities)}\n(Segment: '{transcription}')\n---\n" | |
if not profanity_found_overall: | |
results_text = "No profanity found in any segment.\n" | |
full_text_segments = [t[0] for t in all_transcriptions_info if t[0] not in ["[Resample Error]", "[Transcription Error]"]] | |
if full_text_segments: | |
results_text += f"\nFull approximate transcription:\n{' '.join(full_text_segments)}" | |
elif not profanity_found_overall : # if no profanity and no successful transcription | |
results_text = "No profanity found and could not generate full transcription." | |
return results_text.strip() if results_text else "Processing complete. No specific findings or transcription available." | |
except Exception as e: | |
print(f"Error processing audio file {audio_file_path}: {e}") | |
return f"An unexpected error occurred: {e}" | |
# --- Gradio Interface Callbacks (youtube_loader_and_process, twitch_loader_and_process) --- | |
# These functions remain the same as in the previous good version. | |
# For brevity, I'm omitting them here, but they should be included in your final script. | |
def youtube_loader_and_process(youtube_link): | |
if not youtube_link: | |
return "Please provide a YouTube link.", None | |
downloaded_file_path = None # Initialize | |
try: | |
print(f"Downloading YouTube video: {youtube_link}") | |
yt = YouTube(str(youtube_link)) | |
video_stream = yt.streams.filter(only_audio=True).first() | |
if not video_stream: | |
return "No audio stream found for this YouTube video.", None | |
with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as tmp_audio_file: | |
video_stream.download(filename=tmp_audio_file.name) | |
downloaded_file_path = tmp_audio_file.name | |
print(f"Downloaded YouTube audio to: {downloaded_file_path}") | |
results = process_audio_file(downloaded_file_path) | |
return results, downloaded_file_path | |
except Exception as e: | |
print(f"Error downloading or processing YouTube link: {e}") | |
# If download failed before path was set, downloaded_file_path might still be None | |
return f"Error: {e}", downloaded_file_path if downloaded_file_path and os.path.exists(downloaded_file_path) else None | |
# No explicit finally: os.remove here, Gradio Audio component needs the file. | |
# Consider a cleanup strategy for long-running servers. | |
def twitch_loader_and_process(twitch_link_or_id): | |
if not twitch_link_or_id: | |
return "Please provide a Twitch link or VOD ID.", None | |
final_audio_path_for_gradio = None # Initialize | |
try: | |
print(f"Downloading Twitch VOD: {twitch_link_or_id}") | |
with tempfile.TemporaryDirectory() as temp_dir: | |
base_name = os.path.join(temp_dir, "twitch_audio") | |
# Try to make twitch-dl use a common audio/video suffix, though it might choose its own | |
command = ["twitch-dl", "download", "-q", "audio_only", twitch_link_or_id, "--output", base_name + ".%(format)s"] | |
print(f"Executing: {' '.join(command)}") | |
process_result = subprocess.run(command, capture_output=True, text=True, check=False) | |
if process_result.returncode != 0: | |
print(f"twitch-dl error: {process_result.stderr}") | |
return f"Error downloading Twitch VOD: {process_result.stderr}", None | |
downloaded_files = glob(os.path.join(temp_dir, "twitch_audio.*")) | |
if not downloaded_files: | |
# Fallback if filename pattern didn't work as expected | |
# twitch-dl might also create VODID.mkv or similar | |
# For robustness, search for any media file if the specific pattern fails | |
all_media_in_temp = [f for f_ext in ('.mkv', '.mp4', '.ts', '.aac', '.wav', '.mp3') | |
for f in glob(os.path.join(temp_dir, f"*{f_ext}"))] | |
if all_media_in_temp: | |
downloaded_files = all_media_in_temp | |
if not downloaded_files: | |
print(f"Twitch download completed, but output file not found in {temp_dir}. Check twitch-dl output naming.") | |
print(f"stdout: {process_result.stdout}") | |
print(f"stderr: {process_result.stderr}") | |
return "Twitch download completed, but output file not found.", None | |
downloaded_file_path = downloaded_files[0] | |
print(f"Downloaded Twitch audio to: {downloaded_file_path}") | |
results = process_audio_file(downloaded_file_path) | |
if os.path.exists(downloaded_file_path): | |
# Copy to a new temp file that Gradio can use and that persists beyond this function | |
with tempfile.NamedTemporaryFile(suffix=os.path.splitext(downloaded_file_path)[1], delete=False) as persistant_tmp_file: | |
shutil.copy2(downloaded_file_path, persistant_tmp_file.name) | |
final_audio_path_for_gradio = persistant_tmp_file.name | |
return results, final_audio_path_for_gradio | |
except FileNotFoundError: | |
return "Error: `twitch-dl` command not found. Please ensure it's installed and in your PATH.", None | |
except subprocess.CalledProcessError as e: # Should be caught by check=False and returncode !=0 | |
print(f"Twitch-dl execution failed: {e.stderr if e.stderr else e.stdout}") | |
return f"Error executing twitch-dl: {e.stderr if e.stderr else e.stdout}", None | |
except Exception as e: | |
print(f"Error processing Twitch link: {e}") | |
return f"An unexpected error occurred: {e}", None | |
# --- Gradio UI Definition --- | |
with gr.Blocks(theme=gr.themes.Soft()) as demo: # Added a soft theme | |
gr.Markdown("# Audio Content Analyzer") | |
gr.Markdown("Transcribes audio and checks for specific words. Processes audio in 5-second chunks.") | |
if MODEL is None or PROCESSOR is None: | |
gr.Warning("Transcription model failed to load. Transcription features will not work.") | |
with gr.Tabs(): | |
# with gr.TabItem("From your voice (Microphone)"): | |
# with gr.Column(): | |
# voice_input = gr.Audio(sources=["microphone"], type="filepath", label="Record or Upload Microphone Audio") | |
# voice_output_text = gr.Textbox(label="Analysis Results", lines=10, interactive=False) | |
# submit_voice_button = gr.Button("Submit Microphone Audio") | |
with gr.TabItem("From an Audio File"): | |
with gr.Column(): | |
file_input = gr.Audio(sources=["upload"], type="filepath", label="Upload Audio File (.wav, .mp3, etc.)") | |
# --- MODIFIED SECTION: ADD EXAMPLES --- | |
# Ensure the 'audio_examples' directory exists and has audio files. | |
# Example: Create 'audio_examples/sample1.wav', 'audio_examples/another_sample.mp3' | |
if not os.path.exists(EXAMPLE_AUDIO_DIR): | |
gr.Markdown(f"_(Optional: Create a directory named '{EXAMPLE_AUDIO_DIR}' and add audio files to it for quick examples.)_") | |
else: | |
example_files_list = [] | |
for ext in ("*.wav", "*.mp3", "*.flac", "*.m4a", "*.ogg"): # Common audio extensions | |
example_files_list.extend(glob(os.path.join(EXAMPLE_AUDIO_DIR, ext))) | |
if example_files_list: | |
gr.Examples( | |
examples=sorted(example_files_list), # Sort for consistent order | |
inputs=file_input, # Clicking an example populates this input | |
label="Or select an example audio file:", | |
# examples_per_page=5 # Optional: if you have many examples | |
) | |
else: | |
gr.Markdown(f"_(No example audio files found in '{EXAMPLE_AUDIO_DIR}'. Add some .wav, .mp3, etc. files!)_") | |
# --- END MODIFIED SECTION --- | |
file_output_text = gr.Textbox(label="Analysis Results", lines=10, interactive=False) | |
submit_file_button = gr.Button("Submit Audio File") | |
with gr.TabItem("From YouTube Link"): | |
with gr.Column(): | |
youtube_input_link = gr.Textbox(label="YouTube Video Link", placeholder="e.g., https://www.youtube.com/watch?v=dQw4w9WgXcQ") | |
youtube_output_audio = gr.Audio(label="Downloaded Audio", type="filepath", interactive=False) | |
youtube_output_text = gr.Textbox(label="Analysis Results", lines=10, interactive=False) | |
submit_youtube_button = gr.Button("Fetch and Analyze YouTube Audio") | |
with gr.TabItem("From Twitch VOD"): | |
with gr.Column(): | |
twitch_input_link = gr.Textbox(label="Twitch VOD Link or ID", placeholder="e.g., https://www.twitch.tv/videos/123456789 or 123456789") | |
twitch_output_audio = gr.Audio(label="Downloaded Audio", type="filepath", interactive=False) | |
twitch_output_text = gr.Textbox(label="Analysis Results", lines=10, interactive=False) | |
submit_twitch_button = gr.Button("Fetch and Analyze Twitch VOD") | |
# --- Button Click Handlers --- | |
submit_voice_button.click( | |
fn=process_audio_file, | |
inputs=[voice_input], | |
outputs=[voice_output_text], | |
api_name="analyze_microphone_audio" # Add API name for programmatic access | |
) | |
submit_file_button.click( | |
fn=process_audio_file, | |
inputs=[file_input], | |
outputs=[file_output_text], | |
api_name="analyze_uploaded_audio" | |
) | |
submit_youtube_button.click( | |
fn=youtube_loader_and_process, | |
inputs=[youtube_input_link], | |
outputs=[youtube_output_text, youtube_output_audio], | |
api_name="analyze_youtube_audio" | |
) | |
submit_twitch_button.click( | |
fn=twitch_loader_and_process, | |
inputs=[twitch_input_link], | |
outputs=[twitch_output_text, twitch_output_audio], | |
api_name="analyze_twitch_audio" | |
) | |
if __name__ == "__main__": | |
# Create the example audio directory if it doesn't exist, for user convenience | |
if not os.path.exists(EXAMPLE_AUDIO_DIR): | |
try: | |
os.makedirs(EXAMPLE_AUDIO_DIR) | |
print(f"Created directory: {EXAMPLE_AUDIO_DIR}. Please add some audio files to it for examples.") | |
except OSError as e: | |
print(f"Could not create directory {EXAMPLE_AUDIO_DIR}: {e}") | |
else: | |
print(f"Example audio directory '{EXAMPLE_AUDIO_DIR}' already exists. Add audio files there if you haven't.") | |
demo.launch(share=True, debug=True) |