Spaces:

NameIsJACK
/

yt-transcript

Sleeping

NameIsJACK commited on May 30

Commit

0fc8ce4

verified ·

1 Parent(s): d608319

Create app.py

Files changed (1) hide show

app.py ADDED Viewed

+from fastapi import FastAPI, HTTPException, Query
+from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled, NoTranscriptFound
+from urllib.parse import urlparse, parse_qs
+app = FastAPI()
+def extract_video_id(youtube_url: str) -> str:
+    try:
+        parsed_url = urlparse(youtube_url)
+        video_id = parse_qs(parsed_url.query).get("v", [None])[0]
+        if not video_id:
+            raise ValueError("Invalid YouTube URL.")
+        return video_id
+    except Exception:
+        raise HTTPException(status_code=400, detail="Invalid YouTube URL format.")
+@app.get("/transcript")
+def get_transcript(youtube_url: str = Query(..., description="Full YouTube video URL")):
+    video_id = extract_video_id(youtube_url)
+    try:
+        transcript = YouTubeTranscriptApi.get_transcript(video_id)
+        text_only = " ".join([entry["text"] for entry in transcript])
+        return {
+            "video_id": video_id,
+            "transcript": text_only
+        }
+    except TranscriptsDisabled:
+        raise HTTPException(status_code=404, detail="Transcripts are disabled for this video.")
+    except NoTranscriptFound:
+        raise HTTPException(status_code=404, detail="No transcript found for this video.")
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))