Spaces:

AryanSingh04
/

video-summ-api

Sleeping

App Files Files Community

AryanSingh04 commited on 16 days ago

Commit

dc941d3

verified ·

1 Parent(s): 4856cf3

Update app.py

Browse files

Files changed (1) hide show

app.py +38 -28

app.py CHANGED Viewed

@@ -7,41 +7,42 @@ from fastapi import FastAPI, File, UploadFile
 from fastapi.middleware.cors import CORSMiddleware
 from fastapi.responses import JSONResponse
-# Redirect caches to writable dirs on Spaces (pre-import!)
-os.environ["HF_HOME"] = "/tmp/huggingface"
-os.environ["XDG_CACHE_HOME"] = "/tmp"
 from faster_whisper import WhisperModel
 from transformers import pipeline
-app = FastAPI(title="Video → Title & Summary (OSS)")
-# CORS so your frontend can call the API from anywhere
 app.add_middleware(
     CORSMiddleware,
-    allow_origins=["*"],  # tighten if you have a fixed domain
     allow_credentials=True,
     allow_methods=["*"],
     allow_headers=["*"],
 )
-# ---------- Model loading (once) ----------
 print("Loading models...")
-# Whisper: choose "tiny", "base", or "small". "small" = better accuracy, slower.
 WHISPER_SIZE = os.getenv("WHISPER_SIZE", "small")
-whisper_model = WhisperModel(WHISPER_SIZE, device="cpu", compute_type="int8")  # CPU-friendly
-# Summarizer: lightweight & reliable
 summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6")
-# Title generator (tiny T5). You can upgrade to flan-t5-base if you have more RAM.
 title_gen = pipeline("text2text-generation", model="google/flan-t5-small")
 print("Models loaded.")
-# ---------- Helpers ----------
 def extract_audio_wav_16k_mono(video_path: str) -> str:
-    """Extract 16kHz mono WAV from a video file using ffmpeg."""
     with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp:
         audio_path = tmp.name
     cmd = [
@@ -59,61 +60,66 @@ def extract_audio_wav_16k_mono(video_path: str) -> str:
 def safe_trim(text: str, max_chars: int) -> str:
     if len(text) <= max_chars:
         return text
     return text[:max_chars].rsplit(" ", 1)[0] + "..."
 def summarize_and_title(transcript: str) -> Tuple[str, str]:
-    # Keep inputs modest for small models
-    # (BART CNN handles ~1024 tokens; use char trim for simplicity)
-    trimmed = safe_trim(transcript.strip(), 4000)
-    # Summary (adjust lengths as you like)
     summary = summarizer(
         trimmed,
         max_length=140,
         min_length=40,
-        do_sample=False
     )[0]["summary_text"].strip()
-    # Title (short & catchy)
     title_prompt = (
         "Generate a short, catchy, human-friendly title (<= 10 words) for this content:\n"
         + summary
     )
     title = title_gen(title_prompt, max_new_tokens=16, num_return_sequences=1)[0]["generated_text"].strip()
-    # Post-tidy
     title = title.replace('"', "").replace("\n", " ").strip()
     title = safe_trim(title, 80)
     return title, summary
-# ---------- API ----------
 @app.post("/process_video")
 async def process_video(file: UploadFile = File(...)):
-    """Accepts a video file under `file`, returns title+summary JSON."""
     tmp_video = None
     tmp_audio = None
     try:
-        # Save upload
         suffix = os.path.splitext(file.filename or "")[1] or ".mp4"
         with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tv:
             tmp_video = tv.name
             tv.write(await file.read())
-        # Extract audio
         tmp_audio = extract_audio_wav_16k_mono(tmp_video)
-        # Transcribe (English auto-detect; set language="en" if needed)
         segments, _info = whisper_model.transcribe(tmp_audio)
         transcript = " ".join(seg.text for seg in segments).strip()
         if not transcript:
             return JSONResponse({"error": "No speech detected in the video."}, status_code=400)
-        # Summarize + Title (all open-source, local)
         title, summary = summarize_and_title(transcript)
         return JSONResponse({"title": title, "summary": summary})
@@ -123,7 +129,7 @@ async def process_video(file: UploadFile = File(...)):
     except Exception as e:
         return JSONResponse({"error": str(e)}, status_code=500)
     finally:
-        # Cleanup temp files
         for p in (tmp_audio, tmp_video):
             if p and os.path.exists(p):
                 try:
@@ -134,4 +140,8 @@ async def process_video(file: UploadFile = File(...)):
 @app.get("/")
 def root():
-    return {"ok": True, "message": "POST a video to /process_video with form field 'file'."}

 from fastapi.middleware.cors import CORSMiddleware
 from fastapi.responses import JSONResponse
+# --- Put caches in writable paths for Spaces BEFORE any HF imports ---
+os.environ.setdefault("HF_HOME", "/tmp/huggingface")
+os.environ.setdefault("XDG_CACHE_HOME", "/tmp")
 from faster_whisper import WhisperModel
 from transformers import pipeline
+app = FastAPI(title="Video → Title & Summary (Open Source)")
+# CORS so your React app can call this API from anywhere
 app.add_middleware(
     CORSMiddleware,
+    allow_origins=["*"],            # tighten to your domain in production
     allow_credentials=True,
     allow_methods=["*"],
     allow_headers=["*"],
 )
+# --------- Model loading (once) ---------
 print("Loading models...")
+# Whisper: choose tiny | base | small ; small = better accuracy, slower
 WHISPER_SIZE = os.getenv("WHISPER_SIZE", "small")
+# CPU-friendly: int8 compute; uses ~1–2 GB RAM for "small"
+whisper_model = WhisperModel(WHISPER_SIZE, device="cpu", compute_type="int8")
+# Summarizer: compact & solid
 summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6")
+# Title generator (tiny T5). You can switch to flan-t5-base if you upgrade hardware.
 title_gen = pipeline("text2text-generation", model="google/flan-t5-small")
 print("Models loaded.")
+# --------- Helpers ---------
 def extract_audio_wav_16k_mono(video_path: str) -> str:
+    """Extract 16kHz mono WAV from a video file using system ffmpeg."""
     with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp:
         audio_path = tmp.name
     cmd = [
 def safe_trim(text: str, max_chars: int) -> str:
+    """Trim at word boundary to keep inputs within model limits."""
+    text = (text or "").strip()
     if len(text) <= max_chars:
         return text
     return text[:max_chars].rsplit(" ", 1)[0] + "..."
 def summarize_and_title(transcript: str) -> Tuple[str, str]:
+    """Return (title, summary) from transcript using open-source models."""
+    trimmed = safe_trim(transcript, 4000)  # rough guard for token limits
+    # Summary
     summary = summarizer(
         trimmed,
         max_length=140,
         min_length=40,
+        do_sample=False,
     )[0]["summary_text"].strip()
+    # Title
     title_prompt = (
         "Generate a short, catchy, human-friendly title (<= 10 words) for this content:\n"
         + summary
     )
     title = title_gen(title_prompt, max_new_tokens=16, num_return_sequences=1)[0]["generated_text"].strip()
+    # Tidying
     title = title.replace('"', "").replace("\n", " ").strip()
     title = safe_trim(title, 80)
     return title, summary
+# --------- API ---------
 @app.post("/process_video")
 async def process_video(file: UploadFile = File(...)):
+    """
+    Accepts a video under form field 'file'.
+    Returns JSON: { "title": str, "summary": str }
+    """
     tmp_video = None
     tmp_audio = None
     try:
+        # Save uploaded video
         suffix = os.path.splitext(file.filename or "")[1] or ".mp4"
         with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tv:
             tmp_video = tv.name
             tv.write(await file.read())
+        # Extract audio with ffmpeg
         tmp_audio = extract_audio_wav_16k_mono(tmp_video)
+        # Transcribe (auto language). You can force English via language="en"
         segments, _info = whisper_model.transcribe(tmp_audio)
         transcript = " ".join(seg.text for seg in segments).strip()
         if not transcript:
             return JSONResponse({"error": "No speech detected in the video."}, status_code=400)
+        # Summarize + Title
         title, summary = summarize_and_title(transcript)
         return JSONResponse({"title": title, "summary": summary})
     except Exception as e:
         return JSONResponse({"error": str(e)}, status_code=500)
     finally:
+        # Cleanup
         for p in (tmp_audio, tmp_video):
             if p and os.path.exists(p):
                 try:
 @app.get("/")
 def root():
+    return {
+        "ok": True,
+        "message": "POST a video to /process_video with form field 'file'.",
+        "docs": "/docs"
+    }