File size: 7,405 Bytes
b5df735 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 |
import modal
import os
# Create Modal application
app = modal.App(name="gradio-mcp-server")
# Try to get Hugging Face token from Modal secrets (required for speaker diarization)
try:
hf_secret = modal.Secret.from_name("huggingface-secret")
print("✅ Found Hugging Face secret configuration")
except Exception:
hf_secret = None
print("⚠️ Hugging Face secret not found, speaker diarization will be disabled")
# Create mounted volume
volume = modal.Volume.from_name("cache-volume", create_if_missing=True)
cache_dir = "/root/cache"
# Model preloading function
def download_models() -> None:
"""Download and cache Whisper and speaker diarization models"""
import whisper
import os
from pathlib import Path
# Create model cache directory
model_cache_dir = Path("/model")
model_cache_dir.mkdir(exist_ok=True)
print("📥 Downloading Whisper turbo model...")
# Download and cache Whisper turbo model
whisper_model = whisper.load_model("turbo", download_root="/model")
print("✅ Whisper turbo model downloaded and cached")
# Download speaker diarization models if HF token is available
if os.environ.get("HF_TOKEN"):
try:
print("📥 Downloading speaker diarization models...")
from pyannote.audio import Pipeline, Model
from pyannote.audio.core.inference import Inference
import torch
# Set proper cache directory for pyannote
os.environ["PYANNOTE_CACHE"] = "/model/speaker-diarization"
# Download and cache speaker diarization pipeline
# This will automatically cache to the PYANNOTE_CACHE directory
pipeline = Pipeline.from_pretrained(
"pyannote/speaker-diarization-3.1",
use_auth_token=os.environ["HF_TOKEN"],
cache_dir="/model/speaker-diarization"
)
# Preload speaker embedding model for speaker identification
print("📥 Downloading speaker embedding model...")
embedding_model = Model.from_pretrained(
"pyannote/embedding",
use_auth_token=os.environ["HF_TOKEN"],
cache_dir="/model/speaker-embedding"
)
# Set device for models
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
embedding_model.to(device)
embedding_model.eval()
# Create inference object for embedding extraction
inference = Inference(embedding_model, window="whole")
# Verify the pipeline works
print("🧪 Testing speaker diarization pipeline...")
# Create a simple marker file to indicate successful download
import json
speaker_dir = Path("/model/speaker-diarization")
speaker_dir.mkdir(exist_ok=True, parents=True)
embedding_dir = Path("/model/speaker-embedding")
embedding_dir.mkdir(exist_ok=True, parents=True)
config = {
"model_name": "pyannote/speaker-diarization-3.1",
"embedding_model_name": "pyannote/embedding",
"cached_at": str(speaker_dir),
"embedding_cached_at": str(embedding_dir),
"cache_complete": True,
"embedding_cache_complete": True,
"pyannote_cache_env": "/model/speaker-diarization",
"device": str(device)
}
with open(speaker_dir / "download_complete.json", "w") as f:
json.dump(config, f)
print("✅ Speaker diarization and embedding models downloaded and cached")
except Exception as e:
print(f"⚠️ Failed to download speaker diarization models: {e}")
else:
print("⚠️ No HF_TOKEN found, skipping speaker diarization model download")
# Create image environment with model preloading
image = modal.Image.debian_slim(python_version="3.11").apt_install(
# Basic tools
"ffmpeg",
"wget",
"curl",
"unzip",
"gnupg2",
"git", # Required by Whisper
# Chrome dependencies
"libglib2.0-0",
"libnss3",
"libatk-bridge2.0-0",
"libdrm2",
"libxkbcommon0",
"libxcomposite1",
"libxdamage1",
"libxrandr2",
"libgbm1",
"libxss1",
"libasound2"
).run_commands(
# Download and install Chrome directly (faster method)
"wget -q https://dl.google.com/linux/direct/google-chrome-stable_current_amd64.deb",
"apt-get install -y ./google-chrome-stable_current_amd64.deb || apt-get install -y -f",
"rm google-chrome-stable_current_amd64.deb"
).pip_install(
# Web frameworks and basic libraries
"gradio>=5.31.0",
"fastapi",
"pydantic",
"python-dotenv",
# MCP related
"mcp[cli]",
"fastmcp>=2.7.0",
"starlette",
# Network and parsing
"beautifulsoup4",
"selenium",
"requests",
# Whisper and audio processing related
"git+https://github.com/openai/whisper.git",
"ffmpeg-python",
"torchaudio==2.1.0",
"numpy<2",
# Audio processing dependencies
"librosa",
"soundfile",
# Other Whisper ecosystem dependencies
"dacite",
"jiwer",
"pandas",
"loguru==0.6.0",
# GraphQL client (if needed)
"gql[all]~=3.0.0a5",
# Speaker diarization related dependencies
"pyannote.audio==3.1.0",
# System monitoring
"psutil",
).run_function(
download_models,
secrets=[hf_secret] if hf_secret else []
)
# Update file paths to reflect new structure
image = image.add_local_dir("../src", remote_path="/root/src")
secrets = [hf_secret] if hf_secret else []
# ==================== Modal Endpoints Configuration ====================
@app.function(
image=image,
volumes={cache_dir: volume},
cpu=4, # Increased CPU for better performance
memory=8192, # 8GB memory for stable transcription
gpu="A10G",
timeout=1800, # 30 minutes timeout for speaker diarization support
scaledown_window=40, # 15 minutes before scaling down
secrets=secrets,
)
@modal.fastapi_endpoint(method="POST", label="transcribe-audio-chunk-endpoint")
def transcribe_audio_chunk_endpoint(request_data: dict):
"""FastAPI endpoint for transcribing a single audio chunk (for distributed processing)"""
import sys
sys.path.append('/root')
from src.services.modal_transcription_service import ModalTranscriptionService
modal_service = ModalTranscriptionService(cache_dir="/root/cache", use_direct_modal_calls=True)
return modal_service.process_chunk_request(request_data)
@app.function(
image=image,
cpu=2, # Increased CPU for better health check performance
memory=2048, # 2GB memory for stability
timeout=300, # 5 minutes timeout for health checks
scaledown_window=600, # 10 minutes before scaling down
secrets=secrets,
)
@modal.fastapi_endpoint(method="GET", label="health-check-endpoint")
def health_check_endpoint():
"""Health check endpoint to verify service status"""
import sys
sys.path.append('/root')
from src.services.health_service import HealthService
health_service = HealthService()
return health_service.get_health_status()
|