AryanSingh04 commited on
Commit
dc941d3
·
verified ·
1 Parent(s): 4856cf3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +38 -28
app.py CHANGED
@@ -7,41 +7,42 @@ from fastapi import FastAPI, File, UploadFile
7
  from fastapi.middleware.cors import CORSMiddleware
8
  from fastapi.responses import JSONResponse
9
 
10
- # Redirect caches to writable dirs on Spaces (pre-import!)
11
- os.environ["HF_HOME"] = "/tmp/huggingface"
12
- os.environ["XDG_CACHE_HOME"] = "/tmp"
13
 
14
  from faster_whisper import WhisperModel
15
  from transformers import pipeline
16
 
17
- app = FastAPI(title="Video → Title & Summary (OSS)")
18
 
19
- # CORS so your frontend can call the API from anywhere
20
  app.add_middleware(
21
  CORSMiddleware,
22
- allow_origins=["*"], # tighten if you have a fixed domain
23
  allow_credentials=True,
24
  allow_methods=["*"],
25
  allow_headers=["*"],
26
  )
27
 
28
- # ---------- Model loading (once) ----------
29
  print("Loading models...")
30
- # Whisper: choose "tiny", "base", or "small". "small" = better accuracy, slower.
31
  WHISPER_SIZE = os.getenv("WHISPER_SIZE", "small")
32
- whisper_model = WhisperModel(WHISPER_SIZE, device="cpu", compute_type="int8") # CPU-friendly
 
33
 
34
- # Summarizer: lightweight & reliable
35
  summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6")
36
 
37
- # Title generator (tiny T5). You can upgrade to flan-t5-base if you have more RAM.
38
  title_gen = pipeline("text2text-generation", model="google/flan-t5-small")
39
  print("Models loaded.")
40
 
41
 
42
- # ---------- Helpers ----------
43
  def extract_audio_wav_16k_mono(video_path: str) -> str:
44
- """Extract 16kHz mono WAV from a video file using ffmpeg."""
45
  with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp:
46
  audio_path = tmp.name
47
  cmd = [
@@ -59,61 +60,66 @@ def extract_audio_wav_16k_mono(video_path: str) -> str:
59
 
60
 
61
  def safe_trim(text: str, max_chars: int) -> str:
 
 
62
  if len(text) <= max_chars:
63
  return text
64
  return text[:max_chars].rsplit(" ", 1)[0] + "..."
65
 
66
 
67
  def summarize_and_title(transcript: str) -> Tuple[str, str]:
68
- # Keep inputs modest for small models
69
- # (BART CNN handles ~1024 tokens; use char trim for simplicity)
70
- trimmed = safe_trim(transcript.strip(), 4000)
71
 
72
- # Summary (adjust lengths as you like)
73
  summary = summarizer(
74
  trimmed,
75
  max_length=140,
76
  min_length=40,
77
- do_sample=False
78
  )[0]["summary_text"].strip()
79
 
80
- # Title (short & catchy)
81
  title_prompt = (
82
  "Generate a short, catchy, human-friendly title (<= 10 words) for this content:\n"
83
  + summary
84
  )
85
  title = title_gen(title_prompt, max_new_tokens=16, num_return_sequences=1)[0]["generated_text"].strip()
86
 
87
- # Post-tidy
88
  title = title.replace('"', "").replace("\n", " ").strip()
89
  title = safe_trim(title, 80)
 
90
  return title, summary
91
 
92
 
93
- # ---------- API ----------
94
  @app.post("/process_video")
95
  async def process_video(file: UploadFile = File(...)):
96
- """Accepts a video file under `file`, returns title+summary JSON."""
 
 
 
97
  tmp_video = None
98
  tmp_audio = None
99
  try:
100
- # Save upload
101
  suffix = os.path.splitext(file.filename or "")[1] or ".mp4"
102
  with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tv:
103
  tmp_video = tv.name
104
  tv.write(await file.read())
105
 
106
- # Extract audio
107
  tmp_audio = extract_audio_wav_16k_mono(tmp_video)
108
 
109
- # Transcribe (English auto-detect; set language="en" if needed)
110
  segments, _info = whisper_model.transcribe(tmp_audio)
111
  transcript = " ".join(seg.text for seg in segments).strip()
112
 
113
  if not transcript:
114
  return JSONResponse({"error": "No speech detected in the video."}, status_code=400)
115
 
116
- # Summarize + Title (all open-source, local)
117
  title, summary = summarize_and_title(transcript)
118
 
119
  return JSONResponse({"title": title, "summary": summary})
@@ -123,7 +129,7 @@ async def process_video(file: UploadFile = File(...)):
123
  except Exception as e:
124
  return JSONResponse({"error": str(e)}, status_code=500)
125
  finally:
126
- # Cleanup temp files
127
  for p in (tmp_audio, tmp_video):
128
  if p and os.path.exists(p):
129
  try:
@@ -134,4 +140,8 @@ async def process_video(file: UploadFile = File(...)):
134
 
135
  @app.get("/")
136
  def root():
137
- return {"ok": True, "message": "POST a video to /process_video with form field 'file'."}
 
 
 
 
 
7
  from fastapi.middleware.cors import CORSMiddleware
8
  from fastapi.responses import JSONResponse
9
 
10
+ # --- Put caches in writable paths for Spaces BEFORE any HF imports ---
11
+ os.environ.setdefault("HF_HOME", "/tmp/huggingface")
12
+ os.environ.setdefault("XDG_CACHE_HOME", "/tmp")
13
 
14
  from faster_whisper import WhisperModel
15
  from transformers import pipeline
16
 
17
+ app = FastAPI(title="Video → Title & Summary (Open Source)")
18
 
19
+ # CORS so your React app can call this API from anywhere
20
  app.add_middleware(
21
  CORSMiddleware,
22
+ allow_origins=["*"], # tighten to your domain in production
23
  allow_credentials=True,
24
  allow_methods=["*"],
25
  allow_headers=["*"],
26
  )
27
 
28
+ # --------- Model loading (once) ---------
29
  print("Loading models...")
30
+ # Whisper: choose tiny | base | small ; small = better accuracy, slower
31
  WHISPER_SIZE = os.getenv("WHISPER_SIZE", "small")
32
+ # CPU-friendly: int8 compute; uses ~1–2 GB RAM for "small"
33
+ whisper_model = WhisperModel(WHISPER_SIZE, device="cpu", compute_type="int8")
34
 
35
+ # Summarizer: compact & solid
36
  summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6")
37
 
38
+ # Title generator (tiny T5). You can switch to flan-t5-base if you upgrade hardware.
39
  title_gen = pipeline("text2text-generation", model="google/flan-t5-small")
40
  print("Models loaded.")
41
 
42
 
43
+ # --------- Helpers ---------
44
  def extract_audio_wav_16k_mono(video_path: str) -> str:
45
+ """Extract 16kHz mono WAV from a video file using system ffmpeg."""
46
  with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp:
47
  audio_path = tmp.name
48
  cmd = [
 
60
 
61
 
62
  def safe_trim(text: str, max_chars: int) -> str:
63
+ """Trim at word boundary to keep inputs within model limits."""
64
+ text = (text or "").strip()
65
  if len(text) <= max_chars:
66
  return text
67
  return text[:max_chars].rsplit(" ", 1)[0] + "..."
68
 
69
 
70
  def summarize_and_title(transcript: str) -> Tuple[str, str]:
71
+ """Return (title, summary) from transcript using open-source models."""
72
+ trimmed = safe_trim(transcript, 4000) # rough guard for token limits
 
73
 
74
+ # Summary
75
  summary = summarizer(
76
  trimmed,
77
  max_length=140,
78
  min_length=40,
79
+ do_sample=False,
80
  )[0]["summary_text"].strip()
81
 
82
+ # Title
83
  title_prompt = (
84
  "Generate a short, catchy, human-friendly title (<= 10 words) for this content:\n"
85
  + summary
86
  )
87
  title = title_gen(title_prompt, max_new_tokens=16, num_return_sequences=1)[0]["generated_text"].strip()
88
 
89
+ # Tidying
90
  title = title.replace('"', "").replace("\n", " ").strip()
91
  title = safe_trim(title, 80)
92
+
93
  return title, summary
94
 
95
 
96
+ # --------- API ---------
97
  @app.post("/process_video")
98
  async def process_video(file: UploadFile = File(...)):
99
+ """
100
+ Accepts a video under form field 'file'.
101
+ Returns JSON: { "title": str, "summary": str }
102
+ """
103
  tmp_video = None
104
  tmp_audio = None
105
  try:
106
+ # Save uploaded video
107
  suffix = os.path.splitext(file.filename or "")[1] or ".mp4"
108
  with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tv:
109
  tmp_video = tv.name
110
  tv.write(await file.read())
111
 
112
+ # Extract audio with ffmpeg
113
  tmp_audio = extract_audio_wav_16k_mono(tmp_video)
114
 
115
+ # Transcribe (auto language). You can force English via language="en"
116
  segments, _info = whisper_model.transcribe(tmp_audio)
117
  transcript = " ".join(seg.text for seg in segments).strip()
118
 
119
  if not transcript:
120
  return JSONResponse({"error": "No speech detected in the video."}, status_code=400)
121
 
122
+ # Summarize + Title
123
  title, summary = summarize_and_title(transcript)
124
 
125
  return JSONResponse({"title": title, "summary": summary})
 
129
  except Exception as e:
130
  return JSONResponse({"error": str(e)}, status_code=500)
131
  finally:
132
+ # Cleanup
133
  for p in (tmp_audio, tmp_video):
134
  if p and os.path.exists(p):
135
  try:
 
140
 
141
  @app.get("/")
142
  def root():
143
+ return {
144
+ "ok": True,
145
+ "message": "POST a video to /process_video with form field 'file'.",
146
+ "docs": "/docs"
147
+ }