Spaces:
Sleeping
Sleeping
Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from fastapi import FastAPI, HTTPException, Query
|
2 |
+
from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled, NoTranscriptFound
|
3 |
+
from urllib.parse import urlparse, parse_qs
|
4 |
+
|
5 |
+
app = FastAPI()
|
6 |
+
|
7 |
+
def extract_video_id(youtube_url: str) -> str:
|
8 |
+
try:
|
9 |
+
parsed_url = urlparse(youtube_url)
|
10 |
+
video_id = parse_qs(parsed_url.query).get("v", [None])[0]
|
11 |
+
if not video_id:
|
12 |
+
raise ValueError("Invalid YouTube URL.")
|
13 |
+
return video_id
|
14 |
+
except Exception:
|
15 |
+
raise HTTPException(status_code=400, detail="Invalid YouTube URL format.")
|
16 |
+
|
17 |
+
@app.get("/transcript")
|
18 |
+
def get_transcript(youtube_url: str = Query(..., description="Full YouTube video URL")):
|
19 |
+
video_id = extract_video_id(youtube_url)
|
20 |
+
try:
|
21 |
+
transcript = YouTubeTranscriptApi.get_transcript(video_id)
|
22 |
+
text_only = " ".join([entry["text"] for entry in transcript])
|
23 |
+
return {
|
24 |
+
"video_id": video_id,
|
25 |
+
"transcript": text_only
|
26 |
+
}
|
27 |
+
except TranscriptsDisabled:
|
28 |
+
raise HTTPException(status_code=404, detail="Transcripts are disabled for this video.")
|
29 |
+
except NoTranscriptFound:
|
30 |
+
raise HTTPException(status_code=404, detail="No transcript found for this video.")
|
31 |
+
except Exception as e:
|
32 |
+
raise HTTPException(status_code=500, detail=str(e))
|