Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -7,41 +7,42 @@ from fastapi import FastAPI, File, UploadFile
|
|
7 |
from fastapi.middleware.cors import CORSMiddleware
|
8 |
from fastapi.responses import JSONResponse
|
9 |
|
10 |
-
#
|
11 |
-
os.environ
|
12 |
-
os.environ
|
13 |
|
14 |
from faster_whisper import WhisperModel
|
15 |
from transformers import pipeline
|
16 |
|
17 |
-
app = FastAPI(title="Video → Title & Summary (
|
18 |
|
19 |
-
# CORS so your
|
20 |
app.add_middleware(
|
21 |
CORSMiddleware,
|
22 |
-
allow_origins=["*"],
|
23 |
allow_credentials=True,
|
24 |
allow_methods=["*"],
|
25 |
allow_headers=["*"],
|
26 |
)
|
27 |
|
28 |
-
#
|
29 |
print("Loading models...")
|
30 |
-
# Whisper: choose
|
31 |
WHISPER_SIZE = os.getenv("WHISPER_SIZE", "small")
|
32 |
-
|
|
|
33 |
|
34 |
-
# Summarizer:
|
35 |
summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6")
|
36 |
|
37 |
-
# Title generator (tiny T5). You can
|
38 |
title_gen = pipeline("text2text-generation", model="google/flan-t5-small")
|
39 |
print("Models loaded.")
|
40 |
|
41 |
|
42 |
-
#
|
43 |
def extract_audio_wav_16k_mono(video_path: str) -> str:
|
44 |
-
"""Extract 16kHz mono WAV from a video file using ffmpeg."""
|
45 |
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp:
|
46 |
audio_path = tmp.name
|
47 |
cmd = [
|
@@ -59,61 +60,66 @@ def extract_audio_wav_16k_mono(video_path: str) -> str:
|
|
59 |
|
60 |
|
61 |
def safe_trim(text: str, max_chars: int) -> str:
|
|
|
|
|
62 |
if len(text) <= max_chars:
|
63 |
return text
|
64 |
return text[:max_chars].rsplit(" ", 1)[0] + "..."
|
65 |
|
66 |
|
67 |
def summarize_and_title(transcript: str) -> Tuple[str, str]:
|
68 |
-
|
69 |
-
|
70 |
-
trimmed = safe_trim(transcript.strip(), 4000)
|
71 |
|
72 |
-
# Summary
|
73 |
summary = summarizer(
|
74 |
trimmed,
|
75 |
max_length=140,
|
76 |
min_length=40,
|
77 |
-
do_sample=False
|
78 |
)[0]["summary_text"].strip()
|
79 |
|
80 |
-
# Title
|
81 |
title_prompt = (
|
82 |
"Generate a short, catchy, human-friendly title (<= 10 words) for this content:\n"
|
83 |
+ summary
|
84 |
)
|
85 |
title = title_gen(title_prompt, max_new_tokens=16, num_return_sequences=1)[0]["generated_text"].strip()
|
86 |
|
87 |
-
#
|
88 |
title = title.replace('"', "").replace("\n", " ").strip()
|
89 |
title = safe_trim(title, 80)
|
|
|
90 |
return title, summary
|
91 |
|
92 |
|
93 |
-
#
|
94 |
@app.post("/process_video")
|
95 |
async def process_video(file: UploadFile = File(...)):
|
96 |
-
"""
|
|
|
|
|
|
|
97 |
tmp_video = None
|
98 |
tmp_audio = None
|
99 |
try:
|
100 |
-
# Save
|
101 |
suffix = os.path.splitext(file.filename or "")[1] or ".mp4"
|
102 |
with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tv:
|
103 |
tmp_video = tv.name
|
104 |
tv.write(await file.read())
|
105 |
|
106 |
-
# Extract audio
|
107 |
tmp_audio = extract_audio_wav_16k_mono(tmp_video)
|
108 |
|
109 |
-
# Transcribe (English
|
110 |
segments, _info = whisper_model.transcribe(tmp_audio)
|
111 |
transcript = " ".join(seg.text for seg in segments).strip()
|
112 |
|
113 |
if not transcript:
|
114 |
return JSONResponse({"error": "No speech detected in the video."}, status_code=400)
|
115 |
|
116 |
-
# Summarize + Title
|
117 |
title, summary = summarize_and_title(transcript)
|
118 |
|
119 |
return JSONResponse({"title": title, "summary": summary})
|
@@ -123,7 +129,7 @@ async def process_video(file: UploadFile = File(...)):
|
|
123 |
except Exception as e:
|
124 |
return JSONResponse({"error": str(e)}, status_code=500)
|
125 |
finally:
|
126 |
-
# Cleanup
|
127 |
for p in (tmp_audio, tmp_video):
|
128 |
if p and os.path.exists(p):
|
129 |
try:
|
@@ -134,4 +140,8 @@ async def process_video(file: UploadFile = File(...)):
|
|
134 |
|
135 |
@app.get("/")
|
136 |
def root():
|
137 |
-
return {
|
|
|
|
|
|
|
|
|
|
7 |
from fastapi.middleware.cors import CORSMiddleware
|
8 |
from fastapi.responses import JSONResponse
|
9 |
|
10 |
+
# --- Put caches in writable paths for Spaces BEFORE any HF imports ---
|
11 |
+
os.environ.setdefault("HF_HOME", "/tmp/huggingface")
|
12 |
+
os.environ.setdefault("XDG_CACHE_HOME", "/tmp")
|
13 |
|
14 |
from faster_whisper import WhisperModel
|
15 |
from transformers import pipeline
|
16 |
|
17 |
+
app = FastAPI(title="Video → Title & Summary (Open Source)")
|
18 |
|
19 |
+
# CORS so your React app can call this API from anywhere
|
20 |
app.add_middleware(
|
21 |
CORSMiddleware,
|
22 |
+
allow_origins=["*"], # tighten to your domain in production
|
23 |
allow_credentials=True,
|
24 |
allow_methods=["*"],
|
25 |
allow_headers=["*"],
|
26 |
)
|
27 |
|
28 |
+
# --------- Model loading (once) ---------
|
29 |
print("Loading models...")
|
30 |
+
# Whisper: choose tiny | base | small ; small = better accuracy, slower
|
31 |
WHISPER_SIZE = os.getenv("WHISPER_SIZE", "small")
|
32 |
+
# CPU-friendly: int8 compute; uses ~1–2 GB RAM for "small"
|
33 |
+
whisper_model = WhisperModel(WHISPER_SIZE, device="cpu", compute_type="int8")
|
34 |
|
35 |
+
# Summarizer: compact & solid
|
36 |
summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6")
|
37 |
|
38 |
+
# Title generator (tiny T5). You can switch to flan-t5-base if you upgrade hardware.
|
39 |
title_gen = pipeline("text2text-generation", model="google/flan-t5-small")
|
40 |
print("Models loaded.")
|
41 |
|
42 |
|
43 |
+
# --------- Helpers ---------
|
44 |
def extract_audio_wav_16k_mono(video_path: str) -> str:
|
45 |
+
"""Extract 16kHz mono WAV from a video file using system ffmpeg."""
|
46 |
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp:
|
47 |
audio_path = tmp.name
|
48 |
cmd = [
|
|
|
60 |
|
61 |
|
62 |
def safe_trim(text: str, max_chars: int) -> str:
|
63 |
+
"""Trim at word boundary to keep inputs within model limits."""
|
64 |
+
text = (text or "").strip()
|
65 |
if len(text) <= max_chars:
|
66 |
return text
|
67 |
return text[:max_chars].rsplit(" ", 1)[0] + "..."
|
68 |
|
69 |
|
70 |
def summarize_and_title(transcript: str) -> Tuple[str, str]:
|
71 |
+
"""Return (title, summary) from transcript using open-source models."""
|
72 |
+
trimmed = safe_trim(transcript, 4000) # rough guard for token limits
|
|
|
73 |
|
74 |
+
# Summary
|
75 |
summary = summarizer(
|
76 |
trimmed,
|
77 |
max_length=140,
|
78 |
min_length=40,
|
79 |
+
do_sample=False,
|
80 |
)[0]["summary_text"].strip()
|
81 |
|
82 |
+
# Title
|
83 |
title_prompt = (
|
84 |
"Generate a short, catchy, human-friendly title (<= 10 words) for this content:\n"
|
85 |
+ summary
|
86 |
)
|
87 |
title = title_gen(title_prompt, max_new_tokens=16, num_return_sequences=1)[0]["generated_text"].strip()
|
88 |
|
89 |
+
# Tidying
|
90 |
title = title.replace('"', "").replace("\n", " ").strip()
|
91 |
title = safe_trim(title, 80)
|
92 |
+
|
93 |
return title, summary
|
94 |
|
95 |
|
96 |
+
# --------- API ---------
|
97 |
@app.post("/process_video")
|
98 |
async def process_video(file: UploadFile = File(...)):
|
99 |
+
"""
|
100 |
+
Accepts a video under form field 'file'.
|
101 |
+
Returns JSON: { "title": str, "summary": str }
|
102 |
+
"""
|
103 |
tmp_video = None
|
104 |
tmp_audio = None
|
105 |
try:
|
106 |
+
# Save uploaded video
|
107 |
suffix = os.path.splitext(file.filename or "")[1] or ".mp4"
|
108 |
with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tv:
|
109 |
tmp_video = tv.name
|
110 |
tv.write(await file.read())
|
111 |
|
112 |
+
# Extract audio with ffmpeg
|
113 |
tmp_audio = extract_audio_wav_16k_mono(tmp_video)
|
114 |
|
115 |
+
# Transcribe (auto language). You can force English via language="en"
|
116 |
segments, _info = whisper_model.transcribe(tmp_audio)
|
117 |
transcript = " ".join(seg.text for seg in segments).strip()
|
118 |
|
119 |
if not transcript:
|
120 |
return JSONResponse({"error": "No speech detected in the video."}, status_code=400)
|
121 |
|
122 |
+
# Summarize + Title
|
123 |
title, summary = summarize_and_title(transcript)
|
124 |
|
125 |
return JSONResponse({"title": title, "summary": summary})
|
|
|
129 |
except Exception as e:
|
130 |
return JSONResponse({"error": str(e)}, status_code=500)
|
131 |
finally:
|
132 |
+
# Cleanup
|
133 |
for p in (tmp_audio, tmp_video):
|
134 |
if p and os.path.exists(p):
|
135 |
try:
|
|
|
140 |
|
141 |
@app.get("/")
|
142 |
def root():
|
143 |
+
return {
|
144 |
+
"ok": True,
|
145 |
+
"message": "POST a video to /process_video with form field 'file'.",
|
146 |
+
"docs": "/docs"
|
147 |
+
}
|