ytdlp / srv.py
Chrunos's picture
Update srv.py
c3763a5 verified
raw
history blame
12.5 kB
import os
import uuid
import logging
from pathlib import Path
from typing import Optional
# --- FastAPI Imports ---
from fastapi import FastAPI, Request, HTTPException, BackgroundTasks, Body
from fastapi.responses import JSONResponse, FileResponse
from fastapi.staticfiles import StaticFiles
from pydantic import BaseModel, HttpUrl # Use HttpUrl for URL validation
# --- yt-dlp Import ---
from yt_dlp import YoutubeDL
# --- Logging Configuration ---
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
# --- Constants ---
DOWNLOAD_DIR = Path('downloads') # Use pathlib for paths
COOKIE_FILE = 'www.youtube.com_cookies.txt' # Define cookie file path
# --- Create Download Directory ---
DOWNLOAD_DIR.mkdir(parents=True, exist_ok=True)
# --- FastAPI App Initialization ---
app = FastAPI(
title="YouTube Downloader API",
description="API to fetch info and download audio/video from YouTube using yt-dlp.",
version="1.0.0",
)
# --- Mount Static Files Directory ---
# This allows serving files directly from the 'downloads' directory
# under the path '/downloads'
app.mount("/downloads", StaticFiles(directory=DOWNLOAD_DIR), name="downloads")
# --- Pydantic Models for Request/Response Validation ---
class UrlRequest(BaseModel):
"""Request model for endpoints needing just a URL."""
url: HttpUrl # Ensures the input is a valid URL
class MaxDownloadRequest(BaseModel):
"""Request model for the /max endpoint."""
url: HttpUrl
quality: Optional[str] = 'best' # e.g., '1080p', '720p', 'best'
class InfoResponse(BaseModel):
"""Response model for the /get-info endpoint."""
title: Optional[str] = None
thumbnail: Optional[str] = None
duration: Optional[float] = None # Duration is often float/int
channel: Optional[str] = None
class DownloadResponse(BaseModel):
"""Response model for download endpoints."""
download_url: str
filename: str
message: Optional[str] = None
class ErrorResponse(BaseModel):
"""Standard error response model."""
detail: str
# --- Helper Function for Download ---
def perform_download(ydl_opts: dict, url: str, file_path: Path):
"""Synchronously downloads using yt-dlp."""
try:
logger.info(f"Starting download for URL: {url} with options: {ydl_opts}")
# Ensure the output template uses the full file path stem
ydl_opts['outtmpl'] = str(file_path.with_suffix('.%(ext)s'))
with YoutubeDL(ydl_opts) as ydl:
ydl.extract_info(url, download=True)
logger.info(f"Download finished successfully for URL: {url}")
# Find the actual downloaded file (extension might change)
downloaded_files = list(DOWNLOAD_DIR.glob(f"{file_path.stem}.*"))
if not downloaded_files:
logger.error(f"Download completed but no file found for stem: {file_path.stem}")
raise RuntimeError(f"Could not find downloaded file for {url}")
# Return the path of the first matching file
return downloaded_files[0]
except Exception as e:
logger.error(f"yt-dlp download failed for URL {url}: {e}", exc_info=True)
# Clean up potentially incomplete file if it exists
if file_path.exists():
try:
os.remove(file_path)
logger.info(f"Removed incomplete file: {file_path}")
except OSError as rm_err:
logger.error(f"Error removing incomplete file {file_path}: {rm_err}")
raise # Re-raise the exception to be caught by the endpoint handler
# --- API Endpoints ---
@app.get("/")
async def root():
"""Root endpoint providing basic API info."""
# Differs from Flask's 404, provides a simple informational message.
return {"message": "YouTube Downloader API. Use /docs for documentation."}
@app.post(
"/get-info",
response_model=InfoResponse,
responses={500: {"model": ErrorResponse}}
)
async def get_info(payload: UrlRequest = Body(...)):
"""
Extracts video information (title, thumbnail, duration, channel) from a given URL.
"""
logger.info(f"Received /get-info request for URL: {payload.url}")
ydl_opts = {}
if os.path.exists(COOKIE_FILE):
ydl_opts['cookiefile'] = COOKIE_FILE
logger.info("Using cookie file.")
else:
logger.warning(f"Cookie file '{COOKIE_FILE}' not found. Some videos might require login/cookies.")
try:
with YoutubeDL(ydl_opts) as ydl:
# Extract info without downloading
info = ydl.extract_info(str(payload.url), download=False)
# Safely get info fields
return InfoResponse(
title=info.get('title'),
thumbnail=info.get('thumbnail'),
duration=info.get('duration'),
channel=info.get('channel')
)
except Exception as e:
logger.error(f"Error fetching info for {payload.url}: {e}", exc_info=True)
raise HTTPException(status_code=500, detail=f"Failed to extract video info: {str(e)}")
@app.post(
"/download",
response_model=DownloadResponse,
responses={400: {"model": ErrorResponse}, 500: {"model": ErrorResponse}}
)
async def download_audio(request: Request, payload: UrlRequest = Body(...)):
"""
Downloads the audio track of a video as an MP3 file (128kbps).
"""
logger.info(f"Received /download (audio) request for URL: {payload.url}")
# Generate unique filename components
unique_id = str(uuid.uuid4())
# Define the base path without extension (yt-dlp adds it)
file_path_stem = DOWNLOAD_DIR / unique_id
# --- yt-dlp Options for Audio Download ---
ydl_opts = {
'format': '140/m4a/bestaudio/best', # Prioritize format 140 (m4a), fallback to best audio
'outtmpl': str(file_path_stem.with_suffix('.%(ext)s')), # Output filename template
'postprocessors': [{
'key': 'FFmpegExtractAudio',
'preferredcodec': 'mp3', # Convert to MP3
'preferredquality': '128', # Set audio quality
}],
'noplaylist': True, # Avoid downloading entire playlists
'quiet': False, # Show yt-dlp output in logs
'progress_hooks': [lambda d: logger.debug(f"Download progress: {d['status']} - {d.get('_percent_str', '')}")], # Log progress
}
if os.path.exists(COOKIE_FILE):
ydl_opts['cookiefile'] = COOKIE_FILE
logger.info("Using cookie file for audio download.")
else:
logger.warning(f"Cookie file '{COOKIE_FILE}' not found for audio download.")
try:
# Perform the download synchronously
final_file_path = perform_download(ydl_opts, str(payload.url), file_path_stem)
final_filename = final_file_path.name
# Construct the full download URL using request base URL
# request.base_url gives http://<host>:<port>/
# We need to append the static path 'downloads' and the filename
download_url = f"{str(request.base_url).rstrip('/')}/downloads/{final_filename}"
logger.info(f"Audio download complete for {payload.url}. URL: {download_url}")
return DownloadResponse(download_url=download_url, filename=final_filename)
except Exception as e:
logger.error(f"Audio download failed for {payload.url}: {e}", exc_info=True)
raise HTTPException(status_code=500, detail=f"Audio download failed: {str(e)}")
@app.post(
"/max",
response_model=DownloadResponse,
responses={400: {"model": ErrorResponse}, 500: {"model": ErrorResponse}}
)
async def download_video_max_quality(request: Request, payload: MaxDownloadRequest = Body(...)):
"""
Downloads the video in the specified quality (e.g., '1080p', '720p') or 'best' available.
Merges video and audio into an MP4 container.
"""
logger.info(f"Received /max (video) request for URL: {payload.url} with quality: {payload.quality}")
# Generate unique filename components
unique_id = str(uuid.uuid4())
# Define the base path without extension
file_path_stem = DOWNLOAD_DIR / unique_id
# --- Determine yt-dlp Format Selector based on Quality ---
quality = payload.quality.lower() if payload.quality else 'best'
if quality == 'best':
format_selector = 'bestvideo+bestaudio/best' # Best video and audio, merged if possible
elif quality.endswith('p'):
try:
height = int(quality[:-1]) # Extract height like 1080 from '1080p'
# Select best video up to specified height + best audio, fallback to best overall up to height
format_selector = f'bestvideo[height<={height}]+bestaudio/best[height<={height}]'
except ValueError:
logger.warning(f"Invalid quality format: {payload.quality}. Falling back to 'best'.")
format_selector = 'bestvideo+bestaudio/best'
else:
logger.warning(f"Unrecognized quality value: {payload.quality}. Falling back to 'best'.")
format_selector = 'bestvideo+bestaudio/best'
logger.info(f"Using format selector: '{format_selector}'")
# --- yt-dlp Options for Video Download ---
ydl_opts = {
'format': format_selector,
'outtmpl': str(file_path_stem.with_suffix('.%(ext)s')), # Output filename template
'merge_output_format': 'mp4', # Merge into MP4 container if separate streams are downloaded
'noplaylist': True,
'quiet': False,
'progress_hooks': [lambda d: logger.debug(f"Download progress: {d['status']} - {d.get('_percent_str', '')}")],
}
if os.path.exists(COOKIE_FILE):
ydl_opts['cookiefile'] = COOKIE_FILE
logger.info("Using cookie file for video download.")
else:
logger.warning(f"Cookie file '{COOKIE_FILE}' not found for video download.")
try:
# Perform the download synchronously
final_file_path = perform_download(ydl_opts, str(payload.url), file_path_stem)
final_filename = final_file_path.name
# Construct the full download URL
download_url = f"{str(request.base_url).rstrip('/')}/downloads/{final_filename}"
logger.info(f"Video download complete for {payload.url}. URL: {download_url}")
return DownloadResponse(download_url=download_url, filename=final_filename)
except Exception as e:
logger.error(f"Video download failed for {payload.url}: {e}", exc_info=True)
raise HTTPException(status_code=500, detail=f"Video download failed: {str(e)}")
# --- Optional: Add cleanup for old files (Example using BackgroundTasks) ---
# This is a basic example; a more robust solution might involve timestamps or a dedicated cleanup script.
async def cleanup_old_files(directory: Path, max_age_seconds: int):
"""Removes files older than max_age_seconds in the background."""
import time
now = time.time()
count = 0
try:
for item in directory.iterdir():
if item.is_file():
try:
if now - item.stat().st_mtime > max_age_seconds:
os.remove(item)
logger.info(f"Cleaned up old file: {item.name}")
count += 1
except OSError as e:
logger.error(f"Error removing file {item}: {e}")
if count > 0:
logger.info(f"Background cleanup finished. Removed {count} old files.")
else:
logger.info("Background cleanup finished. No old files found.")
except Exception as e:
logger.error(f"Error during background file cleanup: {e}", exc_info=True)
@app.post("/trigger-cleanup")
async def trigger_cleanup(background_tasks: BackgroundTasks):
"""Manually trigger a cleanup of files older than 1 day."""
logger.info("Triggering background cleanup of old download files.")
# Clean files older than 1 day (86400 seconds)
background_tasks.add_task(cleanup_old_files, DOWNLOAD_DIR, 86400)
return {"message": "Background cleanup task scheduled."}
# --- How to Run (using uvicorn) ---
# Save this code as main.py
# Install dependencies: pip install fastapi uvicorn yt-dlp ffmpeg-python pydantic[email] python-multipart requests
# (Note: ffmpeg needs to be installed on your system for audio/video processing)
# Run from terminal: uvicorn main:app --reload
#
# You will also need a 'www.youtube.com_cookies.txt' file in the same directory
# if you need to download age-restricted or private videos.