lukawskikacper commited on
Commit
74cf6bd
·
1 Parent(s): 958cc77

Vibe coded implementation (with some manual fixes)

Browse files

(cherry picked from commit 255acda8c8bcb989fd72006b84dee18553468356)

.dockerignore ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Git
2
+ .git
3
+ .gitignore
4
+
5
+ # Python
6
+ __pycache__/
7
+ *.py[cod]
8
+ *$py.class
9
+ *.so
10
+ .Python
11
+ env/
12
+ build/
13
+ develop-eggs/
14
+ dist/
15
+ downloads/
16
+ eggs/
17
+ .eggs/
18
+ lib/
19
+ lib64/
20
+ parts/
21
+ sdist/
22
+ var/
23
+ *.egg-info/
24
+ .installed.cfg
25
+ *.egg
26
+
27
+ # Virtual environment
28
+ venv/
29
+ .env
30
+ .venv/
31
+ ENV/
32
+
33
+ # Docker
34
+ .dockerignore
35
+ Dockerfile
36
+ docker-compose.yml
37
+
38
+ # IDE
39
+ .idea/
40
+ .vscode/
41
+ *.swp
42
+ *.swo
43
+
44
+ # Misc
45
+ .DS_Store
46
+ .pytest_cache/
47
+ htmlcov/
48
+ .coverage
49
+ .tox/
CLAUDE.md ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Development Guidelines for Vibe Coding RAG
2
+
3
+ ## Commands
4
+ - Build/Install: `poetry install`
5
+ - Run: `poetry run python -m app.main` (once app is created)
6
+ - Lint: `poetry run ruff check .`
7
+ - Format: `poetry run ruff format .`
8
+ - Test: `poetry run pytest`
9
+ - Run single test: `poetry run pytest path/to/test.py::test_function_name -v`
10
+
11
+ ## Code Style
12
+ - **Imports**: Group standard library, third-party, and local imports
13
+ - **Formatting**: Use Black/Ruff compatible formatting
14
+ - **Types**: Use type annotations for function parameters and return values
15
+ - **Naming**:
16
+ - Variables/functions: snake_case
17
+ - Classes: PascalCase
18
+ - Constants: UPPER_SNAKE_CASE
19
+ - **Error Handling**: Use try/except with specific exceptions
20
+ - **Documentation**: Docstrings for all public functions and classes
21
+
22
+ ## Technologies
23
+ - Vector DB: Qdrant
24
+ - Embeddings: SentenceTransformers with sentence-transformers/static-retrieval-mrl-en-v1
25
+ - API: FastAPI (when implemented)
26
+ - Frontend: HTML/CSS/JavaScript with DaisyUI components
27
+
28
+ ## MCP Integration
29
+ - Always call qdrant-code-search find tool when you are about to generate frontend code (HTML/CSS/JS)
30
+ - Store generated code snippets in qdrant-code-search store tool for future reference
31
+
32
+ ## Qdrant
33
+ - Point IDs have to be string-like UUIDs
Dockerfile ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.10-slim
2
+
3
+ # Install system dependencies
4
+ RUN apt-get update && apt-get install -y --no-install-recommends \
5
+ curl \
6
+ && rm -rf /var/lib/apt/lists/*
7
+
8
+ WORKDIR /app
9
+
10
+ # Install Poetry
11
+ RUN pip install poetry==1.8.3
12
+
13
+ # Copy poetry configuration files
14
+ COPY pyproject.toml poetry.lock poetry.toml* ./
15
+
16
+ # Configure poetry to not create a virtual environment
17
+ RUN poetry config virtualenvs.create false
18
+
19
+ # Install dependencies
20
+ RUN poetry install --no-dev --no-interaction --no-ansi
21
+
22
+ # Copy application code
23
+ COPY app ./app
24
+
25
+ # Expose port
26
+ EXPOSE 8000
27
+
28
+ # Set environment variables
29
+ ENV PYTHONPATH=/app
30
+ ENV QDRANT_URL=http://localhost:6333
31
+ # ENV QDRANT_API_KEY=your_api_key_here (uncomment and set if needed)
32
+
33
+ # Calculate the number of workers based on available CPUs
34
+ # Using the recommended formula: (2 * CPU cores) + 1
35
+ ENV WORKERS=4
36
+
37
+ # Create gunicorn config file
38
+ COPY gunicorn.conf.py ./
39
+
40
+ # Command to run the application with Gunicorn and Uvicorn workers
41
+ CMD ["gunicorn", "app.main:app", "-c", "gunicorn.conf.py"]
app/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ # Initialize app package
app/api/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ # Initialize API package
app/api/router.py ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ from fastapi import APIRouter
2
+ from app.api import video
3
+
4
+ router = APIRouter()
5
+
6
+ router.include_router(video.router, prefix="/video", tags=["video"])
app/api/video.py ADDED
@@ -0,0 +1,143 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import APIRouter, HTTPException, Query
2
+ from typing import List, Optional
3
+ from app.models.video import Video, SearchResult, VideoSegment
4
+ from app.services.video_service import (
5
+ process_video,
6
+ search_video_segments,
7
+ get_all_segments,
8
+ get_processed_videos,
9
+ get_video_by_id,
10
+ )
11
+ from pydantic import BaseModel
12
+
13
+ router = APIRouter()
14
+
15
+
16
+ class VideoRequest(BaseModel):
17
+ url: str
18
+
19
+
20
+ class VideoResponse(BaseModel):
21
+ """Response model for video processing with additional status information."""
22
+
23
+ video: Video
24
+ newly_processed: bool = False
25
+
26
+
27
+ @router.post("/process", response_model=VideoResponse)
28
+ async def process_video_endpoint(video_request: VideoRequest) -> VideoResponse:
29
+ """Process a YouTube video to extract and store transcript segments.
30
+ If the video has already been processed, returns the existing data without reprocessing."""
31
+ try:
32
+ import logging
33
+
34
+ # Get the video ID first
35
+ from app.services.video_service import extract_video_id, get_video_by_id
36
+
37
+ video_id = extract_video_id(video_request.url)
38
+
39
+ # Check if already processed
40
+ existing_video = get_video_by_id(video_id)
41
+ already_processed = existing_video is not None and existing_video.processed
42
+
43
+ if already_processed:
44
+ logging.info(f"Video {video_id} already processed, returning existing data")
45
+ return VideoResponse(video=existing_video, newly_processed=False)
46
+
47
+ # Process the video if needed
48
+ result = process_video(video_request.url)
49
+ return VideoResponse(video=result, newly_processed=True)
50
+
51
+ except Exception as e:
52
+ import logging
53
+ import traceback
54
+
55
+ logging.error(f"Error processing video URL {video_request.url}: {str(e)}")
56
+ logging.error(traceback.format_exc())
57
+ raise HTTPException(status_code=500, detail=str(e))
58
+
59
+
60
+ @router.get("/search")
61
+ async def search_video_endpoint(
62
+ query: str = Query(..., description="Search query for video content"),
63
+ video_id: Optional[str] = Query(
64
+ None, description="Optional YouTube video ID to limit search"
65
+ ),
66
+ limit: int = Query(5, description="Maximum number of results to return"),
67
+ ) -> List[SearchResult]:
68
+ """Search for video segments based on the provided query."""
69
+ import logging
70
+
71
+ # Check for invalid video_id
72
+ if video_id and (video_id.lower() == "undefined" or video_id.lower() == "null"):
73
+ logging.warning(f"Invalid video_id in search request: '{video_id}'")
74
+ video_id = None # Clear invalid video_id to perform a global search instead
75
+
76
+ try:
77
+ results = search_video_segments(query, video_id, limit)
78
+ return results
79
+ except Exception as e:
80
+ logging.error(
81
+ f"Error searching for query '{query}' with video_id '{video_id}': {str(e)}"
82
+ )
83
+ raise HTTPException(status_code=500, detail=str(e))
84
+
85
+
86
+ @router.get("/segments/{video_id}")
87
+ async def get_segments_endpoint(video_id: str) -> List[VideoSegment]:
88
+ """Get all segments for a specific video, ordered by start time."""
89
+ import logging
90
+
91
+ # Check for invalid video ID
92
+ if not video_id or video_id.lower() == "undefined" or video_id.lower() == "null":
93
+ logging.warning(f"Invalid video ID requested: '{video_id}'")
94
+ return [] # Return empty list for invalid IDs to avoid frontend errors
95
+
96
+ try:
97
+ segments = get_all_segments(video_id)
98
+ if not segments:
99
+ # Return an empty list instead of 404 to allow frontend to handle gracefully
100
+ return []
101
+ return segments
102
+ except Exception as e:
103
+ # Log the exception for debugging
104
+ logging.error(f"Error getting segments for video {video_id}: {str(e)}")
105
+ raise HTTPException(
106
+ status_code=500, detail=f"Could not retrieve video segments: {str(e)}"
107
+ )
108
+
109
+
110
+ @router.get("/recent")
111
+ async def get_recent_videos_endpoint(
112
+ limit: int = Query(10, description="Maximum number of videos to return"),
113
+ ) -> List[Video]:
114
+ """Get recently processed videos ordered by creation time."""
115
+ try:
116
+ videos = get_processed_videos(limit=limit)
117
+ return videos
118
+ except Exception as e:
119
+ # Log the exception for debugging
120
+ import logging
121
+
122
+ logging.error(f"Error getting recent videos: {str(e)}")
123
+ raise HTTPException(
124
+ status_code=500, detail=f"Could not retrieve recent videos: {str(e)}"
125
+ )
126
+
127
+
128
+ @router.get("/info/{video_id}")
129
+ async def get_video_info_endpoint(video_id: str) -> Video:
130
+ """Get metadata for a specific video."""
131
+ try:
132
+ video = get_video_by_id(video_id)
133
+ if not video:
134
+ # Return a basic video object if not found in database
135
+ return Video(video_id=video_id, title=f"Video {video_id}")
136
+ return video
137
+ except Exception as e:
138
+ import logging
139
+
140
+ logging.error(f"Error getting video info for {video_id}: {str(e)}")
141
+ raise HTTPException(
142
+ status_code=500, detail=f"Could not retrieve video info: {str(e)}"
143
+ )
app/main.py ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, Request
2
+ from fastapi.staticfiles import StaticFiles
3
+ from fastapi.templating import Jinja2Templates
4
+ from fastapi.responses import HTMLResponse, RedirectResponse
5
+ from fastapi.middleware.cors import CORSMiddleware
6
+ from app.api import router as api_router
7
+ from app.services.video_service import get_video_by_id
8
+
9
+ app = FastAPI(title="In-Video Search", docs_url=None, redoc_url=None, openapi_url=None)
10
+
11
+ # Enable CORS
12
+ app.add_middleware(
13
+ CORSMiddleware,
14
+ allow_origins=["*"], # Adjust this in production
15
+ allow_credentials=True,
16
+ allow_methods=["*"],
17
+ allow_headers=["*"],
18
+ )
19
+
20
+ # Mount static files
21
+ app.mount("/static", StaticFiles(directory="app/static"), name="static")
22
+
23
+ # Templates
24
+ templates = Jinja2Templates(directory="app/templates")
25
+
26
+
27
+ @app.get("/", response_class=HTMLResponse)
28
+ async def index(request: Request):
29
+ return templates.TemplateResponse(
30
+ "index.html", {"request": request, "title": "In-Video Search"}
31
+ )
32
+
33
+
34
+ @app.get("/video/{video_id}", response_class=HTMLResponse)
35
+ async def video_page(request: Request, video_id: str):
36
+ # Try to get video info from database
37
+ video = get_video_by_id(video_id)
38
+ title = "Video Player"
39
+
40
+ # If video exists and has a title, use it
41
+ if video and video.title:
42
+ title = video.title
43
+
44
+ return templates.TemplateResponse(
45
+ "video.html",
46
+ {"request": request, "title": title, "video_id": video_id},
47
+ )
48
+
49
+
50
+ @app.get("/watch")
51
+ async def watch_redirect(request: Request, v: str):
52
+ # Redirect YouTube-style URLs to our video page
53
+ return RedirectResponse(url=f"/video/{v}")
54
+
55
+
56
+ # Include API routers
57
+ app.include_router(api_router.router, prefix="/api")
58
+
59
+
60
+ if __name__ == "__main__":
61
+ import uvicorn
62
+
63
+ uvicorn.run("app.main:app", host="0.0.0.0", port=8000, reload=True)
app/models/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ # Initialize models package
app/models/video.py ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pydantic import BaseModel, Field
2
+ from typing import Optional
3
+
4
+
5
+ class VideoSegment(BaseModel):
6
+ """Model for a video segment with transcript."""
7
+
8
+ text: str = Field(..., description="Transcript text of the segment")
9
+ start: float = Field(..., description="Start time in seconds")
10
+ end: float = Field(..., description="End time in seconds")
11
+ segment_id: str = Field(..., description="Unique identifier for the segment")
12
+ video_id: str = Field(..., description="YouTube video ID this segment belongs to")
13
+
14
+
15
+ class Video(BaseModel):
16
+ """Model for a YouTube video with metadata."""
17
+
18
+ video_id: str = Field(..., description="YouTube video ID")
19
+ title: Optional[str] = Field(None, description="Video title")
20
+ description: Optional[str] = Field(None, description="Video description")
21
+ channel: Optional[str] = Field(None, description="Channel name")
22
+ processed: bool = Field(False, description="Whether the video has been processed")
23
+ created_at: Optional[int] = Field(
24
+ None, description="Unix timestamp (seconds since epoch) when the video was processed"
25
+ )
26
+
27
+
28
+ class SearchResult(BaseModel):
29
+ """Model for a video segment search result."""
30
+
31
+ score: float = Field(..., description="Similarity score")
32
+ segment: VideoSegment = Field(..., description="The matching video segment")
app/services/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ # Initialize services package
app/services/qdrant_service.py ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from qdrant_client import QdrantClient
3
+ import logging
4
+
5
+
6
+ def get_qdrant_client() -> QdrantClient:
7
+ """
8
+ Initialize a Qdrant client using environment variables or default to localhost.
9
+
10
+ Environment variables:
11
+ - QDRANT_URL: URL for Qdrant server (default: http://localhost:6333)
12
+ - QDRANT_API_KEY: Optional API key for authentication
13
+
14
+ Returns:
15
+ QdrantClient: Configured Qdrant client
16
+ """
17
+ # Get configuration from environment variables with defaults
18
+ url = os.getenv("QDRANT_URL", "http://localhost:6333")
19
+ api_key = os.getenv("QDRANT_API_KEY")
20
+
21
+ # Configure client with or without API key
22
+ if api_key:
23
+ client = QdrantClient(location=url, api_key=api_key)
24
+ logging.info(f"Connecting to Qdrant at {url} with API key")
25
+ else:
26
+ client = QdrantClient(location=url)
27
+ logging.info(f"Connecting to Qdrant at {url}")
28
+
29
+ # Test connection
30
+ try:
31
+ client.get_collections()
32
+ logging.info(f"Successfully connected to Qdrant at {url}")
33
+ except Exception as e:
34
+ logging.error(f"Failed to connect to Qdrant at {url}: {e}")
35
+ # Connection will be tested again when used
36
+
37
+ return client
38
+
39
+
40
+ # Initialize global client instance
41
+ qdrant_client = get_qdrant_client()
app/services/video_service.py ADDED
@@ -0,0 +1,647 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import uuid
2
+ from typing import List, Dict, Any, Optional
3
+ import re
4
+ from datetime import datetime
5
+ from sentence_transformers import SentenceTransformer
6
+ from qdrant_client.http import models
7
+ from youtube_transcript_api import YouTubeTranscriptApi
8
+ import yt_dlp
9
+ from app.models.video import VideoSegment, Video, SearchResult
10
+ from app.services.qdrant_service import qdrant_client
11
+
12
+ # Initialize the sentence transformer model
13
+ model = SentenceTransformer("sentence-transformers/static-retrieval-mrl-en-v1")
14
+
15
+ # Collection names
16
+ COLLECTION_NAME = "video_segments"
17
+ PROCESSED_VIDEOS_COLLECTION = "processed_videos"
18
+
19
+
20
+ def _fetch_youtube_metadata(video_id: str, video: Optional[Video] = None) -> Video:
21
+ """Helper function to fetch video metadata from YouTube using yt-dlp."""
22
+ import logging
23
+
24
+ if not video:
25
+ video = Video(video_id=video_id)
26
+
27
+ try:
28
+ logging.info(f"Fetching metadata for video {video_id} from YouTube")
29
+
30
+ # Configure yt-dlp options
31
+ ydl_opts = {
32
+ "skip_download": True, # Don't download the video
33
+ "quiet": True, # Don't print progress
34
+ "no_warnings": True, # Don't print warnings
35
+ "extract_flat": True, # Don't extract videos in playlists
36
+ "format": "best", # Best quality (doesn't matter since we're not downloading)
37
+ }
38
+
39
+ # Use yt-dlp to extract video info
40
+ with yt_dlp.YoutubeDL(ydl_opts) as ydl:
41
+ info = ydl.extract_info(
42
+ f"https://www.youtube.com/watch?v={video_id}", download=False
43
+ )
44
+
45
+ # Set video properties if available
46
+ if info.get("title"):
47
+ video.title = info.get("title")
48
+
49
+ if info.get("description"):
50
+ video.description = info.get("description")
51
+
52
+ if info.get("uploader"):
53
+ video.channel = info.get("uploader")
54
+
55
+ logging.info(
56
+ f"Successfully retrieved video metadata: title='{video.title}', channel='{video.channel}'"
57
+ )
58
+ except Exception as meta_error:
59
+ logging.warning(f"Could not fetch metadata from YouTube: {str(meta_error)}")
60
+ if not video.title:
61
+ video.title = f"Video {video_id}"
62
+
63
+ return video
64
+
65
+
66
+ # Ensure collections exist
67
+ def ensure_collection_exists():
68
+ """Ensure the required collections exist in Qdrant."""
69
+ import logging
70
+
71
+ try:
72
+ logging.info("Checking Qdrant collections")
73
+ collections = qdrant_client.get_collections().collections
74
+ collection_names = [collection.name for collection in collections]
75
+ logging.info(f"Existing collections: {collection_names}")
76
+
77
+ # Create video segments collection if it doesn't exist
78
+ if COLLECTION_NAME not in collection_names:
79
+ logging.info(f"Creating collection: {COLLECTION_NAME}")
80
+ vector_size = model.get_sentence_embedding_dimension()
81
+ qdrant_client.create_collection(
82
+ collection_name=COLLECTION_NAME,
83
+ vectors_config=models.VectorParams(
84
+ size=vector_size,
85
+ distance=models.Distance.COSINE,
86
+ ),
87
+ )
88
+ logging.info(
89
+ f"Successfully created {COLLECTION_NAME} collection with vector size {vector_size}"
90
+ )
91
+
92
+ # Create processed videos collection if it doesn't exist
93
+ if PROCESSED_VIDEOS_COLLECTION not in collection_names:
94
+ logging.info(f"Creating collection: {PROCESSED_VIDEOS_COLLECTION}")
95
+ vector_size = model.get_sentence_embedding_dimension()
96
+ qdrant_client.create_collection(
97
+ collection_name=PROCESSED_VIDEOS_COLLECTION,
98
+ vectors_config=models.VectorParams(
99
+ size=vector_size,
100
+ distance=models.Distance.COSINE,
101
+ ),
102
+ )
103
+ qdrant_client.create_payload_index(
104
+ collection_name=PROCESSED_VIDEOS_COLLECTION,
105
+ field_name="video_id",
106
+ field_schema=models.PayloadSchemaType.KEYWORD,
107
+ )
108
+ qdrant_client.create_payload_index(
109
+ collection_name=PROCESSED_VIDEOS_COLLECTION,
110
+ field_name="created_at",
111
+ field_schema=models.IntegerIndexParams(
112
+ type=models.IntegerIndexType.INTEGER,
113
+ range=True,
114
+ ),
115
+ )
116
+ logging.info(
117
+ f"Successfully created {PROCESSED_VIDEOS_COLLECTION} collection with vector size {vector_size}"
118
+ )
119
+ except Exception as e:
120
+ import traceback
121
+
122
+ logging.error(f"Error ensuring collections exist: {str(e)}")
123
+ logging.error(traceback.format_exc())
124
+ raise
125
+
126
+
127
+ def get_embeddings(text: str) -> List[float]:
128
+ """Get embeddings for the given text using SentenceTransformer."""
129
+ return model.encode(text).tolist()
130
+
131
+
132
+ def extract_video_id(youtube_url: str) -> str:
133
+ """Extract YouTube video ID from URL."""
134
+ import logging
135
+
136
+ logging.info(f"Extracting video ID from URL: {youtube_url}")
137
+
138
+ # Match patterns like: https://www.youtube.com/watch?v=VIDEO_ID or https://youtu.be/VIDEO_ID
139
+ patterns = [
140
+ r"(?:youtube\.com/watch\?v=|youtu\.be/)([\w-]+)",
141
+ r"(?:youtube\.com/embed/)([\w-]+)",
142
+ r"(?:youtube\.com/v/)([\w-]+)",
143
+ ]
144
+
145
+ for pattern in patterns:
146
+ match = re.search(pattern, youtube_url)
147
+ if match:
148
+ video_id = match.group(1)
149
+ logging.info(f"Extracted video ID: {video_id}")
150
+ return video_id
151
+
152
+ # If no pattern matches, assume the input might be a direct video ID
153
+ if re.match(r"^[\w-]+$", youtube_url):
154
+ logging.info(f"Using direct video ID: {youtube_url}")
155
+ return youtube_url
156
+
157
+ logging.error(f"Failed to extract video ID from URL: {youtube_url}")
158
+ raise ValueError(f"Could not extract video ID from URL: {youtube_url}")
159
+
160
+
161
+ def get_video_transcript(video_id: str) -> List[Dict[str, Any]]:
162
+ """
163
+ Get transcript for a YouTube video in any available language.
164
+ Will try to get transcripts in this priority:
165
+ 1. English transcript (if available)
166
+ 2. Any available transcript translated to English (if translatable)
167
+ 3. Any available transcript in its original language
168
+ """
169
+ import logging
170
+ import traceback
171
+
172
+ try:
173
+ # Try to get available transcript languages
174
+ transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
175
+
176
+ # First, look for English transcript
177
+ english_transcript = None
178
+ other_transcripts = []
179
+
180
+ # Categorize available transcripts
181
+ for transcript_item in transcript_list:
182
+ if transcript_item.language_code == "en":
183
+ english_transcript = transcript_item
184
+ else:
185
+ other_transcripts.append(transcript_item)
186
+
187
+ # 1. Try English first if available
188
+ if english_transcript:
189
+ try:
190
+ logging.info("Found English transcript, using it directly")
191
+ return english_transcript.fetch()
192
+ except Exception as e:
193
+ logging.warning(f"Failed to fetch English transcript: {str(e)}")
194
+
195
+ # 2. Try translatable transcripts
196
+ translatable_transcripts = [t for t in other_transcripts if t.is_translatable]
197
+ for transcript_item in translatable_transcripts:
198
+ try:
199
+ logging.info(
200
+ f"Trying to translate {transcript_item.language_code} transcript to English"
201
+ )
202
+ translated = transcript_item.translate("en").fetch()
203
+ logging.info(
204
+ f"Successfully translated {transcript_item.language_code} transcript to English"
205
+ )
206
+ return translated
207
+ except Exception as e:
208
+ logging.warning(
209
+ f"Failed to translate {transcript_item.language_code} transcript: {str(e)}"
210
+ )
211
+
212
+ # 3. Try any transcript in original language
213
+ for transcript_item in other_transcripts:
214
+ try:
215
+ logging.info(
216
+ f"Using non-translated {transcript_item.language_code} transcript"
217
+ )
218
+ return transcript_item.fetch()
219
+ except Exception as e:
220
+ logging.warning(
221
+ f"Failed to fetch {transcript_item.language_code} transcript: {str(e)}"
222
+ )
223
+
224
+ # If we get here, no transcripts worked
225
+ available_langs = [t.language_code for t in transcript_list]
226
+ raise ValueError(
227
+ f"No usable transcripts found for video {video_id}. Available languages: {available_langs}"
228
+ )
229
+
230
+ except Exception as e:
231
+ logging.error(f"Transcript API error for video {video_id}: {str(e)}")
232
+ logging.error(traceback.format_exc())
233
+ raise ValueError(f"Could not get transcript for video {video_id}: {str(e)}")
234
+
235
+
236
+ def store_processed_video(video: Video) -> bool:
237
+ """Store a processed video in Qdrant."""
238
+ try:
239
+ # Get a simple embedding for the video ID
240
+ vector = get_embeddings(f"video_{video.video_id}")
241
+
242
+ # Prepare payload
243
+ payload = video.model_dump()
244
+
245
+ # Store in Qdrant
246
+ qdrant_client.upsert(
247
+ collection_name=PROCESSED_VIDEOS_COLLECTION,
248
+ points=[
249
+ models.PointStruct(
250
+ id=uuid.uuid4().hex,
251
+ vector=vector,
252
+ payload=payload,
253
+ ),
254
+ ],
255
+ )
256
+ return True
257
+ except Exception as e:
258
+ print(f"Error storing processed video: {e}")
259
+ return False
260
+
261
+
262
+ def get_processed_videos(limit: int = 10) -> List[Video]:
263
+ """Get recently processed videos ordered by creation time."""
264
+ try:
265
+ # Scroll through the processed videos collection
266
+ scroll_result = qdrant_client.scroll(
267
+ collection_name=PROCESSED_VIDEOS_COLLECTION,
268
+ limit=limit,
269
+ with_payload=True,
270
+ order_by=models.OrderBy(key="created_at", direction=models.Direction.DESC),
271
+ )
272
+
273
+ # Extract videos from the result
274
+ videos = []
275
+ for point in scroll_result[0]:
276
+ # Convert payload to Video
277
+ video = Video(**point.payload)
278
+ videos.append(video)
279
+
280
+ # Sort by created_at timestamp (most recent first)
281
+ videos.sort(key=lambda x: x.created_at or "", reverse=True)
282
+
283
+ return videos[:limit]
284
+ except Exception as e:
285
+ print(f"Error getting processed videos: {e}")
286
+ return []
287
+
288
+
289
+ def process_video(youtube_url: str) -> Video:
290
+ """Process a YouTube video to extract and store transcript segments."""
291
+ import logging
292
+ import traceback
293
+
294
+ logging.info(f"Processing video URL: {youtube_url}")
295
+ transcript = None
296
+ video_id = None
297
+
298
+ # Extract video ID and get transcript
299
+ try:
300
+ # Extract video ID
301
+ video_id = extract_video_id(youtube_url)
302
+ logging.info(f"Successfully extracted video ID: {video_id}")
303
+
304
+ # Check if video has already been processed
305
+ existing_video = get_video_by_id(video_id)
306
+ if existing_video and existing_video.processed:
307
+ logging.info(
308
+ f"Video {video_id} has already been processed. Skipping processing."
309
+ )
310
+ return existing_video
311
+
312
+ # Create basic video object with current timestamp
313
+ current_time = int(datetime.utcnow().timestamp())
314
+ video = Video(video_id=video_id, created_at=current_time)
315
+
316
+ # Get video metadata from YouTube using the helper function
317
+ try:
318
+ video = _fetch_youtube_metadata(video_id, video)
319
+ except Exception as meta_error:
320
+ logging.warning(
321
+ f"Error fetching YouTube metadata during processing: {str(meta_error)}"
322
+ )
323
+ # Continue with processing even if metadata fetch fails
324
+
325
+ # Get transcript
326
+ logging.info(f"Fetching transcript for video ID: {video_id}")
327
+ transcript = get_video_transcript(video_id)
328
+ logging.info(
329
+ f"Successfully retrieved transcript with {len(transcript)} entries"
330
+ )
331
+
332
+ # If we couldn't get metadata and have a transcript, try to extract a title from transcript
333
+ if (
334
+ (not video.title or video.title == f"Video {video_id}")
335
+ and transcript
336
+ and len(transcript) > 0
337
+ ):
338
+ # Handle different transcript formats
339
+ try:
340
+ # Check if transcript is a list of dictionaries (original format)
341
+ if isinstance(transcript[0], dict) and "text" in transcript[0]:
342
+ video.title = f"{transcript[0]['text'][:30]}..."
343
+ # Check if transcript is a list of objects with text attribute
344
+ elif hasattr(transcript[0], "text"):
345
+ video.title = f"{transcript[0].text[:30]}..."
346
+ # If it's another format, just use the string representation of first item
347
+ else:
348
+ first_item_str = str(transcript[0])[:30]
349
+ video.title = f"{first_item_str}..."
350
+ logging.info(f"Set video title from transcript: {video.title}")
351
+ except Exception as title_error:
352
+ logging.warning(
353
+ f"Could not set title from transcript: {str(title_error)}"
354
+ )
355
+ except Exception as e:
356
+ logging.error(f"Error in initial video processing: {str(e)}")
357
+ logging.error(traceback.format_exc())
358
+ raise
359
+
360
+ # Process transcript into segments
361
+ try:
362
+ # Process transcript into overlapping 30-second segments with 10-second overlap
363
+ logging.info(f"Processing {len(transcript)} transcript entries into segments")
364
+ segments = []
365
+
366
+ # First, normalize the transcript to a standard format
367
+ normalized_transcript = []
368
+ for item in transcript:
369
+ if (
370
+ isinstance(item, dict)
371
+ and "text" in item
372
+ and "start" in item
373
+ and "duration" in item
374
+ ):
375
+ # Original dictionary format
376
+ normalized_transcript.append(
377
+ {
378
+ "text": item["text"],
379
+ "start": item["start"],
380
+ "duration": item["duration"],
381
+ }
382
+ )
383
+ elif (
384
+ hasattr(item, "text")
385
+ and hasattr(item, "start")
386
+ and hasattr(item, "duration")
387
+ ):
388
+ # Object with attributes
389
+ normalized_transcript.append(
390
+ {"text": item.text, "start": item.start, "duration": item.duration}
391
+ )
392
+ else:
393
+ # Unknown format, try to extract what we can
394
+ logging.warning(
395
+ f"Encountered unknown transcript item format: {type(item)}"
396
+ )
397
+ try:
398
+ # Convert to string if we can't determine the structure
399
+ text = str(item)
400
+ # Use index as a timestamp approximation
401
+ idx = transcript.index(item)
402
+ normalized_transcript.append(
403
+ {
404
+ "text": text,
405
+ "start": float(idx * 5), # Approximate 5 seconds per item
406
+ "duration": 5.0,
407
+ }
408
+ )
409
+ except Exception as e:
410
+ logging.error(f"Failed to normalize transcript item: {str(e)}")
411
+ continue
412
+
413
+ # Use the normalized transcript for segment processing
414
+ for i in range(len(normalized_transcript)):
415
+ # Find segments that form approximately 30 seconds
416
+ segment_text = []
417
+ start_time = normalized_transcript[i]["start"]
418
+ end_time = start_time
419
+ current_index = i
420
+
421
+ while (
422
+ current_index < len(normalized_transcript)
423
+ and end_time - start_time < 30
424
+ ):
425
+ segment_text.append(normalized_transcript[current_index]["text"])
426
+ end_time = (
427
+ normalized_transcript[current_index]["start"]
428
+ + normalized_transcript[current_index]["duration"]
429
+ )
430
+ current_index += 1
431
+
432
+ if segment_text: # Only create segment if we have text
433
+ segment_id = f"{video_id}_{i}"
434
+ text = " ".join(segment_text)
435
+
436
+ # Create VideoSegment
437
+ segment = VideoSegment(
438
+ text=text,
439
+ start=start_time,
440
+ end=end_time,
441
+ segment_id=segment_id,
442
+ video_id=video_id,
443
+ )
444
+
445
+ segments.append(segment)
446
+
447
+ # Skip forward with 10-second overlap (if we're not at the end)
448
+ if (
449
+ i + 1 < len(normalized_transcript)
450
+ and normalized_transcript[i + 1]["start"] < end_time - 10
451
+ ):
452
+ # Find the next segment that starts at least 20 seconds after our current start
453
+ while (
454
+ i + 1 < len(normalized_transcript)
455
+ and normalized_transcript[i + 1]["start"] < start_time + 20
456
+ ):
457
+ i += 1
458
+
459
+ logging.info(f"Created {len(segments)} segments from transcript")
460
+
461
+ # Store segments in Qdrant
462
+ logging.info("Ensuring Qdrant collections exist")
463
+ ensure_collection_exists()
464
+
465
+ # Store each segment
466
+ logging.info(f"Storing {len(segments)} segments in Qdrant")
467
+ for segment in segments:
468
+ store_segment(segment)
469
+ except Exception as e:
470
+ logging.error(f"Error processing transcript segments: {str(e)}")
471
+ logging.error(traceback.format_exc())
472
+ raise
473
+
474
+ # Mark video as processed and store it
475
+ try:
476
+ logging.info(f"Marking video {video_id} as processed")
477
+ video.processed = True
478
+
479
+ # Store the processed video in Qdrant
480
+ logging.info("Storing processed video in Qdrant")
481
+ store_result = store_processed_video(video)
482
+ if store_result:
483
+ logging.info(f"Successfully stored processed video: {video_id}")
484
+ else:
485
+ logging.warning(f"Failed to store processed video in Qdrant: {video_id}")
486
+
487
+ return video
488
+ except Exception as e:
489
+ logging.error(f"Error storing processed video: {str(e)}")
490
+ logging.error(traceback.format_exc())
491
+ raise
492
+
493
+
494
+ def store_segment(segment: VideoSegment) -> bool:
495
+ """Store a video segment in Qdrant."""
496
+ import logging
497
+
498
+ try:
499
+ # Get embeddings
500
+ logging.debug(f"Getting embeddings for segment {segment.segment_id}")
501
+ vector = get_embeddings(segment.text)
502
+
503
+ # Prepare payload
504
+ payload = segment.model_dump()
505
+
506
+ # Store in Qdrant
507
+ point_id = uuid.uuid4().hex
508
+ logging.debug(
509
+ f"Storing segment {segment.segment_id} in Qdrant with point ID {point_id}"
510
+ )
511
+ qdrant_client.upsert(
512
+ collection_name=COLLECTION_NAME,
513
+ points=[
514
+ models.PointStruct(
515
+ id=point_id,
516
+ vector=vector,
517
+ payload=payload,
518
+ ),
519
+ ],
520
+ )
521
+ return True
522
+ except Exception as e:
523
+ import traceback
524
+
525
+ logging.error(f"Error storing segment {segment.segment_id}: {str(e)}")
526
+ logging.error(traceback.format_exc())
527
+ return False
528
+
529
+
530
+ def search_video_segments(
531
+ query: str, video_id: Optional[str] = None, limit: int = 5
532
+ ) -> List[SearchResult]:
533
+ """Search for video segments based on the provided query."""
534
+ # Get query embeddings
535
+ query_vector = get_embeddings(query)
536
+
537
+ # Prepare filter if video_id is provided
538
+ filter_param = None
539
+ if video_id:
540
+ filter_param = models.Filter(
541
+ must=[
542
+ models.FieldCondition(
543
+ key="video_id",
544
+ match=models.MatchValue(value=video_id),
545
+ ),
546
+ ],
547
+ )
548
+
549
+ # Search in Qdrant
550
+ search_result = qdrant_client.search(
551
+ collection_name=COLLECTION_NAME,
552
+ query_vector=query_vector,
553
+ limit=limit,
554
+ query_filter=filter_param,
555
+ )
556
+
557
+ # Format results
558
+ results = []
559
+ for scored_point in search_result:
560
+ # Convert payload to VideoSegment
561
+ segment = VideoSegment(**scored_point.payload)
562
+
563
+ # Create SearchResult
564
+ result = SearchResult(
565
+ score=scored_point.score,
566
+ segment=segment,
567
+ )
568
+ results.append(result)
569
+
570
+ return results
571
+
572
+
573
+ def get_all_segments(video_id: str) -> List[VideoSegment]:
574
+ """Get all segments for a specific video, ordered by start time."""
575
+ # Prepare filter for the video_id
576
+ filter_param = models.Filter(
577
+ must=[
578
+ models.FieldCondition(
579
+ key="video_id",
580
+ match=models.MatchValue(value=video_id),
581
+ ),
582
+ ],
583
+ )
584
+
585
+ # Search in Qdrant without vector, just to get all segments
586
+ scroll_result = qdrant_client.scroll(
587
+ collection_name=COLLECTION_NAME,
588
+ scroll_filter=filter_param,
589
+ limit=10000, # Adjust based on expected maximum segments
590
+ )
591
+
592
+ # Format results
593
+ segments = []
594
+ for point in scroll_result[0]:
595
+ # Convert payload to VideoSegment
596
+ segment = VideoSegment(**point.payload)
597
+ segments.append(segment)
598
+
599
+ # Sort by start time
600
+ segments.sort(key=lambda x: x.start)
601
+
602
+ return segments
603
+
604
+
605
+ def get_video_by_id(video_id: str) -> Optional[Video]:
606
+ """Get a specific video by its video_id. If not found in database, attempt to fetch from YouTube."""
607
+ import logging
608
+
609
+ try:
610
+ # Create filter for the video_id
611
+ filter_param = models.Filter(
612
+ must=[
613
+ models.FieldCondition(
614
+ key="video_id",
615
+ match=models.MatchValue(value=video_id),
616
+ ),
617
+ ],
618
+ )
619
+
620
+ # Search in the processed_videos collection
621
+ scroll_result = qdrant_client.scroll(
622
+ collection_name=PROCESSED_VIDEOS_COLLECTION,
623
+ scroll_filter=filter_param,
624
+ limit=1, # We only need one result
625
+ with_payload=True,
626
+ )
627
+
628
+ # Check if any results were found
629
+ if scroll_result[0]:
630
+ # Convert payload to Video
631
+ video = Video(**scroll_result[0][0].payload)
632
+
633
+ # If video exists but doesn't have title, try to fetch it from YouTube
634
+ if not video.title or video.title == f"Video {video_id}":
635
+ video = _fetch_youtube_metadata(video_id, video)
636
+
637
+ return video
638
+
639
+ # If video not found in database, fetch basic metadata from YouTube
640
+ logging.info(f"Video {video_id} not found in database, fetching from YouTube")
641
+ video = Video(video_id=video_id)
642
+ return _fetch_youtube_metadata(video_id, video)
643
+
644
+ except Exception as e:
645
+ logging.error(f"Error getting video by ID {video_id}: {str(e)}")
646
+ # Return a basic video object with just the ID
647
+ return Video(video_id=video_id, title=f"Video {video_id}")
app/static/css/style.css ADDED
@@ -0,0 +1,137 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /* Custom styles */
2
+
3
+ /* Video Carousel */
4
+ .carousel-item {
5
+ scroll-snap-align: center;
6
+ }
7
+
8
+ /* Make carousel items responsive but maintain minimum width */
9
+ @media (max-width: 640px) {
10
+ .carousel-item {
11
+ min-width: 200px;
12
+ }
13
+ }
14
+
15
+ @media (min-width: 640px) {
16
+ .carousel-item {
17
+ min-width: 250px;
18
+ }
19
+ }
20
+
21
+ /* Carousel container - don't let arrows overlap content */
22
+ .carousel {
23
+ overflow-x: hidden;
24
+ scrollbar-width: none; /* Hide scrollbar for Firefox */
25
+ -ms-overflow-style: none; /* Hide scrollbar for IE/Edge */
26
+ }
27
+
28
+ .carousel::-webkit-scrollbar {
29
+ display: none; /* Hide scrollbar for Chrome/Safari/Opera */
30
+ }
31
+
32
+ /* Navigation arrow styles */
33
+ .btn-circle.btn-disabled {
34
+ opacity: 0.5;
35
+ cursor: not-allowed;
36
+ }
37
+
38
+ /* Video card styling - ensure proper structure */
39
+ .carousel-item .card {
40
+ display: flex;
41
+ flex-direction: column;
42
+ height: 100%;
43
+ }
44
+
45
+ .carousel-item .card figure {
46
+ width: 100%;
47
+ flex: 0 0 auto;
48
+ }
49
+
50
+ .carousel-item .card .card-body {
51
+ flex: 1 0 auto;
52
+ display: flex;
53
+ flex-direction: column;
54
+ }
55
+
56
+ /* Transcript container */
57
+ .transcript-container {
58
+ max-height: 500px;
59
+ overflow-y: auto;
60
+ padding-right: 1rem;
61
+ }
62
+
63
+ /* Transcript segments */
64
+ .transcript-segment {
65
+ padding: 0.625rem;
66
+ margin-bottom: 0.5rem;
67
+ border-radius: 0.5rem;
68
+ cursor: pointer;
69
+ transition: all 0.2s ease;
70
+ border: 1px solid transparent;
71
+ background-color: var(--base-200, #f3f4f6);
72
+ }
73
+
74
+ .transcript-segment:hover {
75
+ background-color: var(--base-300, #e5e7eb);
76
+ transform: translateY(-1px);
77
+ box-shadow: 0 2px 5px rgba(0, 0, 0, 0.05);
78
+ }
79
+
80
+ .transcript-segment.highlight {
81
+ background-color: var(--primary-focus, rgba(59, 130, 246, 0.2));
82
+ border-left: 3px solid var(--primary, #3b82f6);
83
+ box-shadow: 0 2px 8px rgba(0, 0, 0, 0.08);
84
+ }
85
+
86
+ .transcript-segment.hidden-segment {
87
+ display: none;
88
+ }
89
+
90
+ /* Timestamp */
91
+ .timestamp {
92
+ display: inline-block;
93
+ background-color: var(--neutral, #e5e7eb);
94
+ padding: 0.125rem 0.5rem;
95
+ border-radius: 9999px;
96
+ font-size: 0.75rem;
97
+ font-weight: bold;
98
+ color: var(--neutral-content, #4b5563);
99
+ margin-right: 0.5rem;
100
+ }
101
+
102
+ /* Score badge */
103
+ .score-badge {
104
+ display: inline-block;
105
+ background-color: var(--primary, #3b82f6);
106
+ color: var(--primary-content, white);
107
+ border-radius: 9999px;
108
+ padding: 0.125rem 0.5rem;
109
+ font-size: 0.75rem;
110
+ margin-left: 0.5rem;
111
+ }
112
+
113
+ /* Search result */
114
+ .search-result {
115
+ transition: all 0.2s ease;
116
+ }
117
+
118
+ .search-result:hover {
119
+ transform: translateY(-2px);
120
+ }
121
+
122
+ /* Metadata tags */
123
+ .metadata-tags {
124
+ display: flex;
125
+ flex-wrap: wrap;
126
+ gap: 0.25rem;
127
+ margin-top: 0.5rem;
128
+ }
129
+
130
+ .metadata-tag {
131
+ font-size: 0.7rem;
132
+ padding: 0.1rem 0.4rem;
133
+ border-radius: 9999px;
134
+ background-color: var(--accent, #d8b4fe);
135
+ color: var(--accent-content, #581c87);
136
+ white-space: nowrap;
137
+ }
app/static/js/index.js ADDED
@@ -0,0 +1,268 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // Index page functionality
2
+ document.addEventListener('DOMContentLoaded', () => {
3
+ const youtubeUrlInput = document.getElementById('youtube-url');
4
+ const processButton = document.getElementById('process-button');
5
+ const processStatus = document.getElementById('process-status');
6
+ const processingIndicator = document.getElementById('processing');
7
+ const recentlyProcessedCard = document.getElementById('recently-processed');
8
+ const videoListContainer = document.getElementById('video-list');
9
+
10
+ // Example video buttons
11
+ const exampleButtons = document.querySelectorAll('.example-video');
12
+
13
+ // Process button click handler
14
+ processButton.addEventListener('click', () => processVideo());
15
+
16
+ // Enter key in input field
17
+ youtubeUrlInput.addEventListener('keypress', (e) => {
18
+ if (e.key === 'Enter') processVideo();
19
+ });
20
+
21
+ // Example video buttons
22
+ exampleButtons.forEach(button => {
23
+ button.addEventListener('click', () => {
24
+ youtubeUrlInput.value = button.dataset.url;
25
+ processVideo();
26
+ });
27
+ });
28
+
29
+ // Process video function
30
+ function processVideo() {
31
+ const youtubeUrl = youtubeUrlInput.value.trim();
32
+ if (!youtubeUrl) {
33
+ processStatus.innerHTML = '<div class="alert alert-warning">Please enter a YouTube URL</div>';
34
+ return;
35
+ }
36
+
37
+ // Extract video ID
38
+ const videoId = extractVideoId(youtubeUrl);
39
+ if (!videoId) {
40
+ processStatus.innerHTML = '<div class="alert alert-error">Invalid YouTube URL</div>';
41
+ return;
42
+ }
43
+
44
+ // Show loading indicator with spinner and text
45
+ processStatus.innerHTML = `
46
+ <div class="flex items-center justify-center my-4">
47
+ <span class="loading loading-spinner loading-md text-primary"></span>
48
+ <span class="ml-2">Processing video... This may take a few moments</span>
49
+ </div>
50
+ `;
51
+
52
+ // Set a timeout to handle overly long processing
53
+ const timeoutId = setTimeout(() => {
54
+ processStatus.innerHTML = `
55
+ <div class="alert alert-warning">
56
+ <svg xmlns="http://www.w3.org/2000/svg" class="stroke-current shrink-0 h-6 w-6" fill="none" viewBox="0 0 24 24">
57
+ <path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M12 9v2m0 4h.01m-6.938 4h13.856c1.54 0 2.502-1.667 1.732-3L13.732 4c-.77-1.333-2.694-1.333-3.464 0L3.34 16c-.77 1.333.192 3 1.732 3z" />
58
+ </svg>
59
+ <span>Processing is taking longer than expected. Please wait...</span>
60
+ </div>
61
+ `;
62
+ }, 20000); // 20 seconds
63
+
64
+ // Send request to process the video
65
+ fetch('/api/video/process', {
66
+ method: 'POST',
67
+ headers: {
68
+ 'Content-Type': 'application/json'
69
+ },
70
+ body: JSON.stringify({ url: youtubeUrl })
71
+ })
72
+ .then(response => {
73
+ if (!response.ok) {
74
+ throw new Error('Failed to process video');
75
+ }
76
+ return response.json();
77
+ })
78
+ .then(data => {
79
+ // Clear timeout for long-running process
80
+ clearTimeout(timeoutId);
81
+
82
+ // Extract video ID from response (handles both old and new API formats)
83
+ const videoId = data.video ? data.video.video_id : data.video_id;
84
+ const isNewlyProcessed = data.newly_processed !== undefined ? data.newly_processed : true;
85
+
86
+ if (!videoId) {
87
+ throw new Error('Invalid response: Missing video ID');
88
+ }
89
+
90
+ // Get video title (for display)
91
+ const videoTitle = data.video ? data.video.title : (data.title || `Video ${videoId}`);
92
+
93
+ // Log for debugging
94
+ console.log('Process response:', {videoId, isNewlyProcessed, data});
95
+
96
+ // Show success message
97
+ processStatus.innerHTML = `
98
+ <div role="alert" class="alert alert-success">
99
+ <svg xmlns="http://www.w3.org/2000/svg" class="stroke-current shrink-0 h-6 w-6" fill="none" viewBox="0 0 24 24">
100
+ <path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M9 12l2 2 4-4m6 2a9 9 0 11-18 0 9 9 0 0118 0z" />
101
+ </svg>
102
+ <span>${isNewlyProcessed ? 'Video processed successfully!' : 'Video was already processed!'}</span>
103
+ <div>
104
+ <a href="/video/${videoId}" class="btn btn-sm btn-primary">
105
+ <svg xmlns="http://www.w3.org/2000/svg" class="h-4 w-4 mr-1" fill="none" viewBox="0 0 24 24" stroke="currentColor">
106
+ <path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M14.752 11.168l-3.197-2.132A1 1 0 0010 9.87v4.263a1 1 0 001.555.832l3.197-2.132a1 1 0 000-1.664z" />
107
+ <path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M21 12a9 9 0 11-18 0 9 9 0 0118 0z" />
108
+ </svg>
109
+ Open Video
110
+ </a>
111
+ </div>
112
+ </div>
113
+ `;
114
+
115
+ // Update recent videos lists
116
+ displayRecentVideos();
117
+ loadFooterRecentVideos(); // Update footer videos as well
118
+ })
119
+ .catch(error => {
120
+ // Clear timeout for long-running process
121
+ clearTimeout(timeoutId);
122
+
123
+ // Show error message
124
+ console.error('Process error:', error);
125
+ processStatus.innerHTML = handleError(error);
126
+ });
127
+ }
128
+
129
+ // Display recently processed videos
130
+ function displayRecentVideos() {
131
+ // Show loading state
132
+ recentlyProcessedCard.classList.remove('hidden');
133
+ videoListContainer.innerHTML = `
134
+ <div class="flex justify-center items-center p-4">
135
+ <span class="loading loading-spinner loading-md"></span>
136
+ <span class="ml-2">Loading recent videos...</span>
137
+ </div>
138
+ `;
139
+
140
+ const carouselPrev = document.getElementById('carousel-prev');
141
+ const carouselNext = document.getElementById('carousel-next');
142
+
143
+ // Fetch recent videos from server
144
+ fetch('/api/video/recent?limit=5')
145
+ .then(response => {
146
+ if (!response.ok) {
147
+ throw new Error('Failed to fetch recent videos');
148
+ }
149
+ return response.json();
150
+ })
151
+ .then(videos => {
152
+ if (videos && videos.length > 0) {
153
+ // Limit to 5 videos
154
+ const limitedVideos = videos.slice(0, 5);
155
+
156
+ // Generate carousel items
157
+ const carouselItems = limitedVideos.map((video, index) => {
158
+ // Format date if available
159
+ let formattedDate = '';
160
+ if (video.created_at) {
161
+ const date = new Date(video.created_at * 1000); // Convert Unix timestamp to milliseconds
162
+ formattedDate = date.toLocaleDateString();
163
+ }
164
+
165
+ // Use title or default
166
+ const videoTitle = video.title || `Video ${video.video_id}`;
167
+
168
+ return `
169
+ <div id="video-${index}" class="carousel-item">
170
+ <a href="/video/${video.video_id}" class="card bg-base-100 shadow-sm hover:shadow-md transition-all w-64 md:w-72 flex flex-col">
171
+ <figure class="w-full h-36 overflow-hidden">
172
+ <img src="https://img.youtube.com/vi/${video.video_id}/mqdefault.jpg" alt="Thumbnail" class="w-full h-full object-cover">
173
+ </figure>
174
+ <div class="card-body p-3">
175
+ <h3 class="card-title text-sm line-clamp-2">${videoTitle}</h3>
176
+ <div class="text-xs opacity-70">${formattedDate}</div>
177
+ </div>
178
+ </a>
179
+ </div>
180
+ `;
181
+ }).join('');
182
+
183
+ // Add carousel items to container
184
+ videoListContainer.innerHTML = carouselItems;
185
+
186
+ // Setup navigation arrows
187
+ if (limitedVideos.length > 1) {
188
+ // Show arrows for multiple videos
189
+ let currentIndex = 0;
190
+ const maxIndex = limitedVideos.length - 1;
191
+
192
+ // Show navigation arrows
193
+ carouselPrev.classList.remove('hidden');
194
+ carouselNext.classList.remove('hidden');
195
+
196
+ // Left button is disabled by default (we're at the start)
197
+ const prevButton = carouselPrev.querySelector('button');
198
+ const nextButton = carouselNext.querySelector('button');
199
+ prevButton.classList.add('btn-disabled');
200
+
201
+ // Functions to update button states
202
+ const updateButtonStates = () => {
203
+ if (currentIndex === 0) {
204
+ prevButton.classList.add('btn-disabled');
205
+ } else {
206
+ prevButton.classList.remove('btn-disabled');
207
+ }
208
+
209
+ if (currentIndex === maxIndex) {
210
+ nextButton.classList.add('btn-disabled');
211
+ } else {
212
+ nextButton.classList.remove('btn-disabled');
213
+ }
214
+ };
215
+
216
+ // Setup navigation buttons
217
+ prevButton.addEventListener('click', () => {
218
+ if (currentIndex > 0) {
219
+ currentIndex--;
220
+ document.getElementById(`video-${currentIndex}`).scrollIntoView({
221
+ behavior: 'smooth',
222
+ block: 'nearest',
223
+ inline: 'center'
224
+ });
225
+ updateButtonStates();
226
+ }
227
+ });
228
+
229
+ nextButton.addEventListener('click', () => {
230
+ if (currentIndex < maxIndex) {
231
+ currentIndex++;
232
+ document.getElementById(`video-${currentIndex}`).scrollIntoView({
233
+ behavior: 'smooth',
234
+ block: 'nearest',
235
+ inline: 'center'
236
+ });
237
+ updateButtonStates();
238
+ }
239
+ });
240
+ } else {
241
+ // Hide arrows for single video
242
+ carouselPrev.classList.add('hidden');
243
+ carouselNext.classList.add('hidden');
244
+ }
245
+ } else {
246
+ recentlyProcessedCard.classList.add('hidden');
247
+ carouselPrev.classList.add('hidden');
248
+ carouselNext.classList.add('hidden');
249
+ }
250
+ })
251
+ .catch(error => {
252
+ console.error('Error fetching recent videos:', error);
253
+ videoListContainer.innerHTML = `
254
+ <div class="alert alert-error">
255
+ <svg xmlns="http://www.w3.org/2000/svg" class="stroke-current shrink-0 h-6 w-6" fill="none" viewBox="0 0 24 24">
256
+ <path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M10 14l2-2m0 0l2-2m-2 2l-2-2m2 2l2 2m7-2a9 9 0 11-18 0 9 9 0 0118 0z" />
257
+ </svg>
258
+ <span>Failed to load recent videos</span>
259
+ </div>
260
+ `;
261
+ carouselPrev.classList.add('hidden');
262
+ carouselNext.classList.add('hidden');
263
+ });
264
+ }
265
+
266
+ // Display recent videos on page load
267
+ displayRecentVideos();
268
+ });
app/static/js/main.js ADDED
@@ -0,0 +1,139 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // Common functionality
2
+
3
+ // Initialize on page load
4
+ document.addEventListener('DOMContentLoaded', () => {
5
+ // Display recent videos in the footer on page load
6
+ loadFooterRecentVideos();
7
+
8
+ // Handle theme switching
9
+ const themeItems = document.querySelectorAll('.theme-item');
10
+ themeItems.forEach(item => {
11
+ item.addEventListener('click', () => {
12
+ const theme = item.dataset.theme;
13
+ document.documentElement.setAttribute('data-theme', theme);
14
+ localStorage.setItem('theme', theme);
15
+ });
16
+ });
17
+
18
+ // Apply saved theme from localStorage if available
19
+ const savedTheme = localStorage.getItem('theme');
20
+ if (savedTheme) {
21
+ document.documentElement.setAttribute('data-theme', savedTheme);
22
+ }
23
+ });
24
+
25
+ // Format seconds to MM:SS format
26
+ function formatTime(seconds) {
27
+ const minutes = Math.floor(seconds / 60);
28
+ const secs = Math.floor(seconds % 60);
29
+ return `${minutes.toString().padStart(2, '0')}:${secs.toString().padStart(2, '0')}`;
30
+ }
31
+
32
+ // Error handling function
33
+ function handleError(error) {
34
+ console.error('Error:', error);
35
+ return `<div role="alert" class="alert alert-error">
36
+ <svg xmlns="http://www.w3.org/2000/svg" class="stroke-current shrink-0 h-6 w-6" fill="none" viewBox="0 0 24 24">
37
+ <path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M10 14l2-2m0 0l2-2m-2 2l-2-2m2 2l2 2m7-2a9 9 0 11-18 0 9 9 0 0118 0z" />
38
+ </svg>
39
+ <span>Error: ${error.message || 'Something went wrong'}</span>
40
+ <div>
41
+ <button class="btn btn-sm btn-ghost" onclick="window.location.reload()">Retry</button>
42
+ </div>
43
+ </div>`;
44
+ }
45
+
46
+ // Toast notification function
47
+ function showToast(message, type = 'info') {
48
+ const toast = document.createElement('div');
49
+ toast.className = `alert alert-${type} fixed bottom-4 right-4 max-w-xs z-50 shadow-lg`;
50
+
51
+ // Different icon based on type
52
+ let icon = '';
53
+ switch(type) {
54
+ case 'success':
55
+ icon = `<svg xmlns="http://www.w3.org/2000/svg" class="stroke-current shrink-0 h-6 w-6" fill="none" viewBox="0 0 24 24">
56
+ <path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M9 12l2 2 4-4m6 2a9 9 0 11-18 0 9 9 0 0118 0z" />
57
+ </svg>`;
58
+ break;
59
+ case 'warning':
60
+ icon = `<svg xmlns="http://www.w3.org/2000/svg" class="stroke-current shrink-0 h-6 w-6" fill="none" viewBox="0 0 24 24">
61
+ <path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M12 9v2m0 4h.01m-6.938 4h13.856c1.54 0 2.502-1.667 1.732-3L13.732 4c-.77-1.333-2.694-1.333-3.464 0L3.34 16c-.77 1.333.192 3 1.732 3z" />
62
+ </svg>`;
63
+ break;
64
+ case 'error':
65
+ icon = `<svg xmlns="http://www.w3.org/2000/svg" class="stroke-current shrink-0 h-6 w-6" fill="none" viewBox="0 0 24 24">
66
+ <path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M10 14l2-2m0 0l2-2m-2 2l-2-2m2 2l2 2m7-2a9 9 0 11-18 0 9 9 0 0118 0z" />
67
+ </svg>`;
68
+ break;
69
+ default: // info
70
+ icon = `<svg xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 24 24" class="stroke-current shrink-0 w-6 h-6">
71
+ <path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M13 16h-1v-4h-1m1-4h.01M21 12a9 9 0 11-18 0 9 9 0 0118 0z"></path>
72
+ </svg>`;
73
+ }
74
+
75
+ toast.innerHTML = `
76
+ ${icon}
77
+ <span>${message}</span>
78
+ <div>
79
+ <button class="btn btn-sm btn-ghost" onclick="this.parentElement.parentElement.remove()">
80
+ <svg xmlns="http://www.w3.org/2000/svg" class="h-4 w-4" fill="none" viewBox="0 0 24 24" stroke="currentColor">
81
+ <path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M6 18L18 6M6 6l12 12" />
82
+ </svg>
83
+ </button>
84
+ </div>
85
+ `;
86
+ document.body.appendChild(toast);
87
+
88
+ // Auto-dismiss after 3 seconds
89
+ setTimeout(() => {
90
+ toast.classList.add('opacity-0', 'transition-opacity', 'duration-500');
91
+ setTimeout(() => toast.remove(), 500);
92
+ }, 3000);
93
+ }
94
+
95
+ // Extract video ID from YouTube URL
96
+ function extractVideoId(url) {
97
+ const regExp = /^.*((youtu.be\/)|(v\/)|(\/u\/\w\/)|(embed\/)|(watch\?))\??v?=?([^#&?]*).*/;
98
+ const match = url.match(regExp);
99
+ return (match && match[7].length === 11) ? match[7] : null;
100
+ }
101
+
102
+ // Load recent videos into the footer from the API
103
+ function loadFooterRecentVideos() {
104
+ const footerRecentVideos = document.getElementById('footer-recent-videos');
105
+ if (!footerRecentVideos) return;
106
+
107
+ // Show loading state
108
+ footerRecentVideos.innerHTML = '<p class="text-sm opacity-70">Loading recent videos...</p>';
109
+
110
+ // Fetch recent videos from server API
111
+ fetch('/api/video/recent?limit=3')
112
+ .then(response => {
113
+ if (!response.ok) {
114
+ throw new Error('Failed to fetch recent videos');
115
+ }
116
+ return response.json();
117
+ })
118
+ .then(videos => {
119
+ if (videos && videos.length > 0) {
120
+ // Generate HTML for recent videos
121
+ const videoLinks = videos.map(video => {
122
+ return `
123
+ <a href="/video/${video.video_id}" class="link link-hover block py-1 truncate">
124
+ <span class="text-xs text-primary">▶</span> ${video.title || `Video ${video.video_id}`}
125
+ </a>
126
+ `;
127
+ }).join('');
128
+
129
+ // Add videos to the footer
130
+ footerRecentVideos.innerHTML = videoLinks;
131
+ } else {
132
+ footerRecentVideos.innerHTML = '<p class="text-sm opacity-70">No recent videos</p>';
133
+ }
134
+ })
135
+ .catch(error => {
136
+ console.error('Error loading footer videos:', error);
137
+ footerRecentVideos.innerHTML = '<p class="text-sm opacity-70">Failed to load recent videos</p>';
138
+ });
139
+ }
app/static/js/video.js ADDED
@@ -0,0 +1,440 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // Video page functionality
2
+ document.addEventListener('DOMContentLoaded', () => {
3
+ const playerElement = document.getElementById('youtube-player');
4
+ const searchInput = document.getElementById('search-input');
5
+ const searchButton = document.getElementById('search-button');
6
+ const transcriptContainer = document.getElementById('transcript-container');
7
+ const loadingIndicator = document.getElementById('loading');
8
+ const toggleTranscriptButton = document.getElementById('toggle-transcript');
9
+
10
+ let transcriptSegments = [];
11
+ let ytPlayer = null;
12
+ let isProcessingUrl = false;
13
+
14
+ // Check if there's a search query in the URL
15
+ const urlParams = new URLSearchParams(window.location.search);
16
+ const searchQuery = urlParams.get('q');
17
+ const processingUrl = urlParams.get('processing');
18
+
19
+ // Format time to display as HH:MM:SS
20
+ function formatTime(seconds) {
21
+ const hours = Math.floor(seconds / 3600);
22
+ const mins = Math.floor((seconds % 3600) / 60);
23
+ const secs = Math.floor(seconds % 60);
24
+
25
+ if (hours > 0) {
26
+ return `${hours}:${mins.toString().padStart(2, '0')}:${secs.toString().padStart(2, '0')}`;
27
+ } else {
28
+ return `${mins}:${secs.toString().padStart(2, '0')}`;
29
+ }
30
+ }
31
+
32
+ // Handle error display
33
+ function handleError(error) {
34
+ console.error(error);
35
+ return `<div class="alert alert-error">Error: ${error.message}</div>`;
36
+ }
37
+
38
+ // Initialize YouTube iframe API
39
+ function initYouTubePlayer() {
40
+ // Get the existing iframe
41
+ const iframeId = playerElement.getAttribute('id');
42
+
43
+ // Load the YouTube iframe API if it's not already loaded
44
+ if (!window.YT) {
45
+ const tag = document.createElement('script');
46
+ tag.src = 'https://www.youtube.com/iframe_api';
47
+ const firstScriptTag = document.getElementsByTagName('script')[0];
48
+ firstScriptTag.parentNode.insertBefore(tag, firstScriptTag);
49
+
50
+ window.onYouTubeIframeAPIReady = function() {
51
+ createYouTubePlayer(iframeId);
52
+ };
53
+ } else {
54
+ createYouTubePlayer(iframeId);
55
+ }
56
+ }
57
+
58
+ // Create YouTube player object
59
+ function createYouTubePlayer(iframeId) {
60
+ ytPlayer = new YT.Player(iframeId, {
61
+ events: {
62
+ 'onReady': onPlayerReady
63
+ }
64
+ });
65
+ }
66
+
67
+ // When player is ready
68
+ function onPlayerReady(event) {
69
+ console.log('Player ready');
70
+ }
71
+
72
+ // Load transcript segments
73
+ function loadTranscript() {
74
+ transcriptContainer.innerHTML = '<div class="flex justify-center my-4"><span class="loading loading-spinner loading-md"></span><span class="ml-2">Loading transcript...</span></div>';
75
+
76
+ // Check if video ID is valid before making API call
77
+ if (!videoId || videoId === 'undefined' || videoId === 'null') {
78
+ transcriptContainer.innerHTML = `
79
+ <div class="alert alert-error">
80
+ <div>
81
+ <svg xmlns="http://www.w3.org/2000/svg" class="stroke-current shrink-0 h-6 w-6" fill="none" viewBox="0 0 24 24"><path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M10 14l2-2m0 0l2-2m-2 2l-2-2m2 2l2 2m7-2a9 9 0 11-18 0 9 9 0 0118 0z" /></svg>
82
+ <span>Invalid video ID. Please return to the home page and select a valid video.</span>
83
+ </div>
84
+ </div>
85
+ `;
86
+ return;
87
+ }
88
+
89
+ fetch(`/api/video/segments/${videoId}`)
90
+ .then(response => {
91
+ if (!response.ok) {
92
+ throw new Error('Failed to load transcript: ' + response.status);
93
+ }
94
+ return response.json();
95
+ })
96
+ .then(segments => {
97
+ transcriptSegments = segments;
98
+
99
+ if (!segments || segments.length === 0) {
100
+ transcriptContainer.innerHTML = `
101
+ <div class="alert alert-info">
102
+ <div>
103
+ <svg xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 24 24" class="stroke-current shrink-0 w-6 h-6"><path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M13 16h-1v-4h-1m1-4h.01M21 12a9 9 0 11-18 0 9 9 0 0118 0z"></path></svg>
104
+ <span>No transcript available for this video. Try processing the video first from the home page.</span>
105
+ </div>
106
+ </div>
107
+ `;
108
+ } else {
109
+ displayTranscript(segments);
110
+ }
111
+ })
112
+ .catch(error => {
113
+ console.error('Error loading transcript:', error);
114
+ transcriptContainer.innerHTML = `
115
+ <div class="alert alert-error">
116
+ <div>
117
+ <svg xmlns="http://www.w3.org/2000/svg" class="stroke-current shrink-0 h-6 w-6" fill="none" viewBox="0 0 24 24"><path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M10 14l2-2m0 0l2-2m-2 2l-2-2m2 2l2 2m7-2a9 9 0 11-18 0 9 9 0 0118 0z" /></svg>
118
+ <span>Error loading transcript: ${error.message}</span>
119
+ </div>
120
+ </div>
121
+ <p class="mt-4">This may happen if:</p>
122
+ <ul class="list-disc ml-8 mt-2">
123
+ <li>The video hasn't been processed yet</li>
124
+ <li>The video ID is incorrect</li>
125
+ <li>The server is experiencing issues</li>
126
+ </ul>
127
+ <p class="mt-4">Try processing this video from the home page first.</p>
128
+ `;
129
+ });
130
+ }
131
+
132
+ // Display transcript segments
133
+ function displayTranscript(segments) {
134
+ const html = segments.map((segment, index) => {
135
+ const formattedTime = formatTime(segment.start);
136
+
137
+ return `
138
+ <div class="transcript-segment" data-start="${segment.start}" data-end="${segment.end}" data-index="${index}">
139
+ <span class="timestamp">${formattedTime}</span>
140
+ <span class="segment-text">${segment.text}</span>
141
+ </div>
142
+ `;
143
+ }).join('');
144
+
145
+ transcriptContainer.innerHTML = html;
146
+
147
+ // Add click handlers to segments
148
+ document.querySelectorAll('.transcript-segment').forEach(segment => {
149
+ segment.addEventListener('click', () => {
150
+ const startTime = parseFloat(segment.dataset.start);
151
+ seekToTime(startTime);
152
+ });
153
+ });
154
+ }
155
+
156
+ // Seek to specific time in the video
157
+ function seekToTime(seconds) {
158
+ console.log('Seeking to time:', seconds);
159
+
160
+ if (ytPlayer && typeof ytPlayer.seekTo === 'function') {
161
+ try {
162
+ // Ensure seconds is a number
163
+ seconds = parseFloat(seconds);
164
+
165
+ // Seek to time
166
+ ytPlayer.seekTo(seconds, true);
167
+
168
+ // Try to play the video (may be blocked by browser autoplay policies)
169
+ try {
170
+ ytPlayer.playVideo();
171
+ } catch (e) {
172
+ console.warn('Could not autoplay video:', e);
173
+ }
174
+
175
+ // Highlight the current segment
176
+ highlightSegment(seconds);
177
+ } catch (error) {
178
+ console.error('Error seeking to time:', error);
179
+ }
180
+ } else {
181
+ console.error('YouTube player is not ready yet or seekTo method is not available');
182
+ }
183
+ }
184
+
185
+ // Highlight segment containing the current time
186
+ function highlightSegment(time) {
187
+ // Remove highlight from all segments
188
+ document.querySelectorAll('.transcript-segment').forEach(segment => {
189
+ segment.classList.remove('highlight');
190
+ });
191
+
192
+ // Find the segment containing current time
193
+ // Need to find by approximate match since floating point exact matches may not work
194
+ const segments = document.querySelectorAll('.transcript-segment');
195
+ let currentSegment = null;
196
+
197
+ for (const segment of segments) {
198
+ const start = parseFloat(segment.dataset.start);
199
+ const end = parseFloat(segment.dataset.end);
200
+
201
+ if (time >= start && time <= end) {
202
+ currentSegment = segment;
203
+ break;
204
+ }
205
+ }
206
+
207
+ // If exact time match not found, find the closest segment
208
+ if (!currentSegment) {
209
+ const exactMatch = document.querySelector(`.transcript-segment[data-start="${time}"]`);
210
+ if (exactMatch) {
211
+ currentSegment = exactMatch;
212
+ }
213
+ }
214
+
215
+ if (currentSegment) {
216
+ currentSegment.classList.add('highlight');
217
+ currentSegment.scrollIntoView({ behavior: 'smooth', block: 'center' });
218
+ }
219
+ }
220
+
221
+ // Search functionality
222
+ searchButton.addEventListener('click', performSearch);
223
+ searchInput.addEventListener('keypress', e => {
224
+ if (e.key === 'Enter') performSearch();
225
+ });
226
+
227
+ function performSearch() {
228
+ const query = searchInput.value.trim();
229
+ if (!query) {
230
+ transcriptContainer.innerHTML = '<div class="alert alert-warning">Please enter a search query</div>';
231
+ return;
232
+ }
233
+
234
+ // Validate video ID before searching
235
+ if (!videoId || videoId === 'undefined' || videoId === 'null') {
236
+ transcriptContainer.innerHTML = `
237
+ <div class="alert alert-error">
238
+ <div>
239
+ <svg xmlns="http://www.w3.org/2000/svg" class="stroke-current shrink-0 h-6 w-6" fill="none" viewBox="0 0 24 24"><path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M10 14l2-2m0 0l2-2m-2 2l-2-2m2 2l2 2m7-2a9 9 0 11-18 0 9 9 0 0118 0z" /></svg>
240
+ <span>Invalid video ID. Please return to the home page and select a valid video.</span>
241
+ </div>
242
+ </div>
243
+ `;
244
+ return;
245
+ }
246
+
247
+ // Show loading indicator
248
+ loadingIndicator.classList.remove('hidden');
249
+
250
+ // Send search request
251
+ fetch(`/api/video/search?query=${encodeURIComponent(query)}&video_id=${videoId}`)
252
+ .then(response => {
253
+ if (!response.ok) {
254
+ throw new Error('Search failed');
255
+ }
256
+ return response.json();
257
+ })
258
+ .then(results => {
259
+ // Hide loading indicator
260
+ loadingIndicator.classList.add('hidden');
261
+
262
+ if (results.length === 0) {
263
+ // Show "no results" message in transcript container
264
+ transcriptContainer.innerHTML = `
265
+ <div role="alert" class="alert alert-info">
266
+ <svg xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 24 24" class="stroke-current shrink-0 w-6 h-6">
267
+ <path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M13 16h-1v-4h-1m1-4h.01M21 12a9 9 0 11-18 0 9 9 0 0118 0z"></path>
268
+ </svg>
269
+ <span>No results found for "${query}". <a href="#" id="reset-search" class="link link-primary">Show all transcript</a></span>
270
+ </div>`;
271
+
272
+ // Add click handler to reset search link
273
+ document.getElementById('reset-search').addEventListener('click', (e) => {
274
+ e.preventDefault();
275
+ resetTranscriptFilter();
276
+ displayTranscript(transcriptSegments);
277
+ });
278
+
279
+ return;
280
+ }
281
+
282
+ // Display search results as filtered transcript
283
+ filterTranscript(results);
284
+
285
+ // Add a header with search info and reset option
286
+ const searchInfoHeader = document.createElement('div');
287
+ searchInfoHeader.className = 'mb-4 flex justify-between items-center';
288
+ searchInfoHeader.innerHTML = `
289
+ <div class="badge badge-accent">${results.length} results for "${query}"</div>
290
+ <a href="#" id="reset-search" class="link link-primary text-sm">Show all transcript</a>
291
+ `;
292
+
293
+ // Insert the header before transcript segments
294
+ transcriptContainer.insertBefore(searchInfoHeader, transcriptContainer.firstChild);
295
+
296
+ // Add click handler to reset search link
297
+ document.getElementById('reset-search').addEventListener('click', (e) => {
298
+ e.preventDefault();
299
+ resetTranscriptFilter();
300
+ displayTranscript(transcriptSegments);
301
+ });
302
+ })
303
+ .catch(error => {
304
+ // Hide loading indicator
305
+ loadingIndicator.classList.add('hidden');
306
+
307
+ // Show error
308
+ transcriptContainer.innerHTML = handleError(error);
309
+ });
310
+ }
311
+
312
+ // Filter transcript to show only matching segments
313
+ function filterTranscript(results) {
314
+ // Create a highlighted version of the transcript with only matching segments
315
+ const html = results.map(result => {
316
+ const segment = result.segment;
317
+ const formattedTime = formatTime(segment.start);
318
+ const score = (result.score * 100).toFixed(0);
319
+ const index = transcriptSegments.findIndex(s => s.segment_id === segment.segment_id);
320
+
321
+ return `
322
+ <div class="transcript-segment search-result" data-start="${segment.start}" data-end="${segment.end}" data-index="${index}">
323
+ <div class="flex justify-between items-center">
324
+ <span class="timestamp">${formattedTime}</span>
325
+ <div class="badge badge-primary">${score}% match</div>
326
+ </div>
327
+ <span class="segment-text mt-1">${segment.text}</span>
328
+ </div>
329
+ `;
330
+ }).join('');
331
+
332
+ // Replace transcript with filtered results
333
+ transcriptContainer.innerHTML = html;
334
+
335
+ // Add click handlers to segments
336
+ document.querySelectorAll('.transcript-segment').forEach(segment => {
337
+ segment.addEventListener('click', () => {
338
+ const startTime = parseFloat(segment.dataset.start);
339
+ seekToTime(startTime);
340
+ });
341
+ });
342
+ }
343
+
344
+ // Transcript is always visible - toggle functionality removed
345
+
346
+ // Reset transcript filter to show all segments
347
+ function resetTranscriptFilter() {
348
+ searchInput.value = '';
349
+ }
350
+
351
+ // Show processing indicator if URL was just processed
352
+ function showProcessingIndicator() {
353
+ if (processingUrl === 'true') {
354
+ isProcessingUrl = true;
355
+ transcriptContainer.innerHTML = `
356
+ <div class="flex items-center justify-center my-4">
357
+ <span class="loading loading-spinner loading-md text-primary"></span>
358
+ <span class="ml-2">Processing video from URL... This may take a few moments</span>
359
+ </div>
360
+ `;
361
+
362
+ // Check for segments every second
363
+ const processingInterval = setInterval(() => {
364
+ // Validate video ID before making API call
365
+ if (!videoId || videoId === 'undefined' || videoId === 'null') {
366
+ clearInterval(processingInterval);
367
+ transcriptContainer.innerHTML = `
368
+ <div class="alert alert-error">
369
+ <div>
370
+ <svg xmlns="http://www.w3.org/2000/svg" class="stroke-current shrink-0 h-6 w-6" fill="none" viewBox="0 0 24 24"><path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M10 14l2-2m0 0l2-2m-2 2l-2-2m2 2l2 2m7-2a9 9 0 11-18 0 9 9 0 0118 0z" /></svg>
371
+ <span>Invalid video ID. Please return to the home page and select a valid video.</span>
372
+ </div>
373
+ </div>
374
+ `;
375
+ return;
376
+ }
377
+
378
+ fetch(`/api/video/segments/${videoId}`)
379
+ .then(response => {
380
+ if (!response.ok) {
381
+ return null;
382
+ }
383
+ return response.json();
384
+ })
385
+ .then(segments => {
386
+ if (segments && segments.length > 0) {
387
+ clearInterval(processingInterval);
388
+ isProcessingUrl = false;
389
+ loadTranscript();
390
+ }
391
+ })
392
+ .catch(error => {
393
+ console.error('Error checking segments:', error);
394
+ });
395
+ }, 2000);
396
+
397
+ // Set timeout to stop checking after 2 minutes
398
+ setTimeout(() => {
399
+ clearInterval(processingInterval);
400
+ if (isProcessingUrl) {
401
+ transcriptContainer.innerHTML = `
402
+ <div class="alert alert-warning">
403
+ <div>
404
+ <svg xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 24 24" class="stroke-current shrink-0 w-6 h-6"><path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M13 16h-1v-4h-1m1-4h.01M21 12a9 9 0 11-18 0 9 9 0 0118 0z"></path></svg>
405
+ <span>Processing is taking longer than expected. Refresh the page to check progress.</span>
406
+ </div>
407
+ </div>
408
+ `;
409
+ isProcessingUrl = false;
410
+ }
411
+ }, 120000);
412
+
413
+ return true;
414
+ }
415
+ return false;
416
+ }
417
+
418
+ // Initialize
419
+ initYouTubePlayer();
420
+
421
+ // Show processing indicator or load transcript
422
+ if (!showProcessingIndicator()) {
423
+ loadTranscript();
424
+ }
425
+
426
+ // If there's a search query in the URL, apply it after transcript loads
427
+ if (searchQuery) {
428
+ const checkTranscriptInterval = setInterval(() => {
429
+ if (transcriptSegments.length > 0) {
430
+ clearInterval(checkTranscriptInterval);
431
+ // Set the search input value and trigger search
432
+ searchInput.value = searchQuery;
433
+ performSearch();
434
+ }
435
+ }, 500);
436
+
437
+ // Set timeout to stop checking after 10 seconds
438
+ setTimeout(() => clearInterval(checkTranscriptInterval), 10000);
439
+ }
440
+ });
app/templates/base.html ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en" data-theme="light">
3
+ <head>
4
+ <meta charset="UTF-8">
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
+ <title>{{ title }}</title>
7
+ <link href="https://cdn.jsdelivr.net/npm/[email protected]/dist/full.css" rel="stylesheet" type="text/css" />
8
+ <script src="https://cdn.tailwindcss.com"></script>
9
+ <link rel="stylesheet" href="{{ url_for('static', path='/css/style.css') }}">
10
+ </head>
11
+ <body class="min-h-screen flex flex-col">
12
+ <!-- Header/Navbar -->
13
+ <div class="navbar bg-base-200 shadow-md">
14
+ <div class="navbar-start">
15
+ <a href="/" class="btn btn-ghost text-xl">
16
+ <svg xmlns="http://www.w3.org/2000/svg" class="h-6 w-6 mr-2" fill="none" viewBox="0 0 24 24" stroke="currentColor">
17
+ <path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M15 10l4.553-2.276A1 1 0 0121 8.618v6.764a1 1 0 01-1.447.894L15 14M5 18h8a2 2 0 002-2V8a2 2 0 00-2-2H5a2 2 0 00-2 2v8a2 2 0 002 2z" />
18
+ </svg>
19
+ In-Video Search
20
+ </a>
21
+ </div>
22
+ <div class="navbar-center">
23
+ <div class="form-control">
24
+ <div class="join">
25
+ <input type="text" id="global-search" placeholder="Search videos..." class="input input-bordered join-item w-full md:w-96" />
26
+ <button id="global-search-button" class="btn btn-primary join-item">
27
+ <svg xmlns="http://www.w3.org/2000/svg" class="h-5 w-5" fill="none" viewBox="0 0 24 24" stroke="currentColor">
28
+ <path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M21 21l-6-6m2-5a7 7 0 11-14 0 7 7 0 0114 0z" />
29
+ </svg>
30
+ </button>
31
+ </div>
32
+ </div>
33
+ </div>
34
+ <div class="navbar-end">
35
+ <div class="dropdown dropdown-end">
36
+ <div tabindex="0" role="button" class="btn btn-ghost btn-circle">
37
+ <svg xmlns="http://www.w3.org/2000/svg" class="h-5 w-5" fill="none" viewBox="0 0 24 24" stroke="currentColor">
38
+ <path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M12 3v1m0 16v1m9-9h-1M4 12H3m15.364 6.364l-.707-.707M6.343 6.343l-.707-.707m12.728 0l-.707.707M6.343 17.657l-.707.707M16 12a4 4 0 11-8 0 4 4 0 018 0z" />
39
+ </svg>
40
+ </div>
41
+ <ul tabindex="0" class="dropdown-content z-[1] menu p-2 shadow bg-base-100 rounded-box w-52">
42
+ <li><button class="theme-item" data-theme="light">Light</button></li>
43
+ <li><button class="theme-item" data-theme="dark">Dark</button></li>
44
+ <li><button class="theme-item" data-theme="cupcake">Cupcake</button></li>
45
+ <li><button class="theme-item" data-theme="synthwave">Synthwave</button></li>
46
+ </ul>
47
+ </div>
48
+ </div>
49
+ </div>
50
+
51
+ <!-- Main Content -->
52
+ <main class="container mx-auto px-4 py-8 flex-grow">
53
+ {% block content %}{% endblock %}
54
+ </main>
55
+
56
+ <!-- Footer -->
57
+ <footer class="footer p-10 bg-base-200 text-base-content">
58
+ <div>
59
+ <span class="footer-title">In-Video Search</span>
60
+ <p>Powered by Qdrant & FastAPI</p>
61
+ <p>Search through video content semantically</p>
62
+ </div>
63
+ <div>
64
+ <span class="footer-title">Recent Videos</span>
65
+ <div id="footer-recent-videos">
66
+ <!-- Recent videos will be loaded here by JavaScript -->
67
+ <p class="text-sm opacity-70">No recent videos</p>
68
+ </div>
69
+ </div>
70
+ <div>
71
+ <span class="footer-title">Resources</span>
72
+ <a class="link link-hover" href="https://qdrant.tech/" target="_blank">Qdrant</a>
73
+ <a class="link link-hover" href="https://fastapi.tiangolo.com/" target="_blank">FastAPI</a>
74
+ <a class="link link-hover" href="https://daisyui.com/" target="_blank">DaisyUI</a>
75
+ </div>
76
+ </footer>
77
+
78
+ <!-- Scripts -->
79
+ <script src="{{ url_for('static', path='/js/main.js') }}"></script>
80
+ {% block scripts %}{% endblock %}
81
+ </body>
82
+ </html>
app/templates/index.html ADDED
@@ -0,0 +1,98 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {% extends "base.html" %}
2
+
3
+ {% block content %}
4
+ <div class="max-w-4xl mx-auto">
5
+ <div class="card bg-base-100 shadow-xl">
6
+ <div class="card-body">
7
+ <h2 class="card-title">Process YouTube Video</h2>
8
+ <p class="text-gray-600 mb-4">Enter a YouTube URL to process its transcript for searching</p>
9
+
10
+ <div class="form-control">
11
+ <label class="label">
12
+ <span class="label-text">Enter YouTube URL</span>
13
+ </label>
14
+ <div class="join w-full">
15
+ <input type="text" id="youtube-url" placeholder="https://www.youtube.com/watch?v=..." class="input input-bordered join-item w-full" />
16
+ <button id="process-button" class="btn btn-primary join-item">Process</button>
17
+ </div>
18
+ </div>
19
+
20
+ <div class="mt-4" id="process-status">
21
+ <!-- Processing status messages will appear here -->
22
+ </div>
23
+
24
+ <div class="divider">OR</div>
25
+
26
+ <h3 class="font-bold mb-2">Example Videos</h3>
27
+ <div class="grid grid-cols-1 md:grid-cols-3 gap-2">
28
+ <button class="btn btn-outline btn-accent btn-sm example-video w-full h-auto" data-url="https://www.youtube.com/watch?v=zjkBMFhNj_g">
29
+ <div class="flex items-center w-full">
30
+ <svg xmlns="http://www.w3.org/2000/svg" class="h-4 w-4 mr-2 flex-shrink-0" fill="none" viewBox="0 0 24 24" stroke="currentColor">
31
+ <path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M14.752 11.168l-3.197-2.132A1 1 0 0010 9.87v4.263a1 1 0 001.555.832l3.197-2.132a1 1 0 000-1.664z" />
32
+ <path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M21 12a9 9 0 11-18 0 9 9 0 0118 0z" />
33
+ </svg>
34
+ <span class="truncate text-left">Intro to Large Language Models</span>
35
+ </div>
36
+ </button>
37
+ <button class="btn btn-outline btn-accent btn-sm example-video w-full h-auto" data-url="https://www.youtube.com/watch?v=7xTGNNLPyMI">
38
+ <div class="flex items-center w-full">
39
+ <svg xmlns="http://www.w3.org/2000/svg" class="h-4 w-4 mr-2 flex-shrink-0" fill="none" viewBox="0 0 24 24" stroke="currentColor">
40
+ <path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M14.752 11.168l-3.197-2.132A1 1 0 0010 9.87v4.263a1 1 0 001.555.832l3.197-2.132a1 1 0 000-1.664z" />
41
+ <path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M21 12a9 9 0 11-18 0 9 9 0 0118 0z" />
42
+ </svg>
43
+ <span class="truncate text-left">Deep Dive into LLMs like ChatGPT</span>
44
+ </div>
45
+ </button>
46
+ <button class="btn btn-outline btn-accent btn-sm example-video w-full h-auto" data-url="https://www.youtube.com/watch?v=EWvNQjAaOHw">
47
+ <div class="flex items-center w-full">
48
+ <svg xmlns="http://www.w3.org/2000/svg" class="h-4 w-4 mr-2 flex-shrink-0" fill="none" viewBox="0 0 24 24" stroke="currentColor">
49
+ <path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M14.752 11.168l-3.197-2.132A1 1 0 0010 9.87v4.263a1 1 0 001.555.832l3.197-2.132a1 1 0 000-1.664z" />
50
+ <path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M21 12a9 9 0 11-18 0 9 9 0 0118 0z" />
51
+ </svg>
52
+ <span class="truncate text-left">How I use LLMs</span>
53
+ </div>
54
+ </button>
55
+ </div>
56
+ </div>
57
+ </div>
58
+
59
+ <div class="card bg-base-100 shadow-xl mt-6 hidden" id="recently-processed">
60
+ <div class="card-body">
61
+ <h2 class="card-title">Recently Processed Videos</h2>
62
+ <div class="mt-4">
63
+ <!-- Video carousel with navigation arrows -->
64
+ <div class="flex items-center gap-2">
65
+ <!-- Left arrow navigation -->
66
+ <div class="hidden md:block" id="carousel-prev">
67
+ <button class="btn btn-circle btn-primary btn-disabled">
68
+ <svg xmlns="http://www.w3.org/2000/svg" class="h-6 w-6" fill="none" viewBox="0 0 24 24" stroke="currentColor">
69
+ <path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M15 19l-7-7 7-7" />
70
+ </svg>
71
+ </button>
72
+ </div>
73
+
74
+ <!-- Carousel content -->
75
+ <div class="carousel carousel-center rounded-box w-full p-2 overflow-x-auto">
76
+ <div id="video-list" class="flex space-x-4 items-stretch">
77
+ <!-- Video cards will be populated here as carousel items -->
78
+ </div>
79
+ </div>
80
+
81
+ <!-- Right arrow navigation -->
82
+ <div class="hidden md:block" id="carousel-next">
83
+ <button class="btn btn-circle btn-primary">
84
+ <svg xmlns="http://www.w3.org/2000/svg" class="h-6 w-6" fill="none" viewBox="0 0 24 24" stroke="currentColor">
85
+ <path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M9 5l7 7-7 7" />
86
+ </svg>
87
+ </button>
88
+ </div>
89
+ </div>
90
+ </div>
91
+ </div>
92
+ </div>
93
+ </div>
94
+ {% endblock %}
95
+
96
+ {% block scripts %}
97
+ <script src="{{ url_for('static', path='/js/index.js') }}"></script>
98
+ {% endblock %}
app/templates/video.html ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {% extends "base.html" %}
2
+
3
+ {% block content %}
4
+ <div class="grid grid-cols-1 lg:grid-cols-2 gap-6">
5
+ <div class="lg:col-span-1">
6
+ <div class="card bg-base-100 shadow-xl">
7
+ <div class="card-body p-4">
8
+ <div class="aspect-video">
9
+ <iframe id="youtube-player" class="w-full h-full"
10
+ src="https://www.youtube.com/embed/{{ video_id }}?enablejsapi=1"
11
+ frameborder="0"
12
+ allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture"
13
+ allowfullscreen>
14
+ </iframe>
15
+ </div>
16
+ </div>
17
+ </div>
18
+ </div>
19
+
20
+ <div class="lg:col-span-1">
21
+ <div class="card bg-base-100 shadow-xl sticky top-4">
22
+ <div class="card-body">
23
+ <div class="flex justify-between items-center">
24
+ <h2 class="card-title">Video Transcript</h2>
25
+ </div>
26
+
27
+ <div class="form-control mb-4">
28
+ <label class="label">
29
+ <span class="label-text">Search in transcript</span>
30
+ </label>
31
+ <div class="join w-full">
32
+ <input type="text" id="search-input" placeholder="Search in this video..." class="input input-bordered join-item w-full" />
33
+ <button id="search-button" class="btn btn-primary join-item">
34
+ <svg xmlns="http://www.w3.org/2000/svg" class="h-5 w-5" fill="none" viewBox="0 0 24 24" stroke="currentColor">
35
+ <path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M21 21l-6-6m2-5a7 7 0 11-14 0 7 7 0 0114 0z" />
36
+ </svg>
37
+ Search
38
+ </button>
39
+ </div>
40
+ </div>
41
+
42
+ <div id="loading" class="hidden mt-2 mb-2">
43
+ <span class="loading loading-spinner loading-md"></span>
44
+ <span class="ml-2">Searching...</span>
45
+ </div>
46
+
47
+ <div id="transcript-container" class="mt-2 transcript-container">
48
+ <!-- Transcript will be loaded here -->
49
+ </div>
50
+ </div>
51
+ </div>
52
+ </div>
53
+ </div>
54
+ {% endblock %}
55
+
56
+ {% block scripts %}
57
+ <script>
58
+ // Store the video ID in a JavaScript variable
59
+ const videoId = "{{ video_id }}";
60
+ </script>
61
+ <script src="{{ url_for('static', path='/js/video.js') }}"></script>
62
+ {% endblock %}
docker-compose.yml ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ version: '3.8'
2
+
3
+ services:
4
+ app:
5
+ build: .
6
+ ports:
7
+ - "8000:8000"
8
+ environment:
9
+ - QDRANT_URL=http://qdrant:6333
10
+ - WORKERS=4 # Set number of workers
11
+ # - QDRANT_API_KEY=your_api_key_here (uncomment and set if needed)
12
+ depends_on:
13
+ - qdrant
14
+ restart: unless-stopped
15
+ healthcheck:
16
+ test: ["CMD", "curl", "-f", "http://localhost:8000/"]
17
+ interval: 30s
18
+ timeout: 10s
19
+ retries: 3
20
+ start_period: 40s
21
+
22
+ qdrant:
23
+ image: qdrant/qdrant:v1.13.5
24
+ volumes:
25
+ - qdrant_data:/qdrant/storage
26
+
27
+ volumes:
28
+ qdrant_data:
example.env ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ # Qdrant Configuration
2
+ QDRANT_URL=http://localhost:6333
3
+ QDRANT_API_KEY=
gunicorn.conf.py ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import multiprocessing
3
+
4
+ # Get the number of workers from environment variable or calculate based on CPU cores
5
+ workers_env = os.getenv("WORKERS")
6
+ if workers_env:
7
+ workers = int(workers_env)
8
+ else:
9
+ # Use the recommended formula: (2 * CPU cores) + 1
10
+ workers = (2 * multiprocessing.cpu_count()) + 1
11
+
12
+ # Use Uvicorn worker class for ASGI support
13
+ worker_class = "uvicorn.workers.UvicornWorker"
14
+
15
+ # Bind to 0.0.0.0:8000
16
+ bind = "0.0.0.0:8000"
17
+
18
+ # Logging
19
+ accesslog = "-" # Log to stdout
20
+ errorlog = "-" # Log to stderr
21
+ loglevel = "info"
22
+
23
+ # Timeout configuration
24
+ timeout = 120 # 2 minutes
25
+ graceful_timeout = 30
26
+
27
+ # Worker settings
28
+ worker_connections = 1000 # Maximum number of connections each worker can handle
29
+ keepalive = 5 # Seconds to wait between client requests before closing connection
30
+
31
+ # For better performance with Uvicorn
32
+ proc_name = "vibe-coding-rag"
poetry.lock CHANGED
The diff for this file is too large to render. See raw diff
 
pyproject.toml CHANGED
@@ -7,16 +7,27 @@ readme = "README.md"
7
  package-mode = false
8
 
9
  [tool.poetry.dependencies]
10
- python = "^3.10"
11
  torch = {version = "^2.6.0+cpu", source = "pytorch-cpu"}
12
  sentence-transformers = "^3.4.1"
13
  qdrant-client = "^1.13.3"
 
 
 
 
 
 
 
14
 
15
  [[tool.poetry.source]]
16
  name = "pytorch-cpu"
17
  url = "https://download.pytorch.org/whl/cpu"
18
  priority = "explicit"
19
 
 
 
 
 
20
  [build-system]
21
  requires = ["poetry-core"]
22
  build-backend = "poetry.core.masonry.api"
 
7
  package-mode = false
8
 
9
  [tool.poetry.dependencies]
10
+ python = "^3.10,<3.14"
11
  torch = {version = "^2.6.0+cpu", source = "pytorch-cpu"}
12
  sentence-transformers = "^3.4.1"
13
  qdrant-client = "^1.13.3"
14
+ fastapi = "^0.115.11"
15
+ uvicorn = "^0.34.0"
16
+ gunicorn = "^21.2.0"
17
+ jinja2 = "^3.1.6"
18
+ youtube-transcript-api = "^1.0.2"
19
+ pytube = "^15.0.0"
20
+ yt-dlp = "^2025.2.19"
21
 
22
  [[tool.poetry.source]]
23
  name = "pytorch-cpu"
24
  url = "https://download.pytorch.org/whl/cpu"
25
  priority = "explicit"
26
 
27
+
28
+ [tool.poetry.group.dev.dependencies]
29
+ ruff = "^0.11.0"
30
+
31
  [build-system]
32
  requires = ["poetry-core"]
33
  build-backend = "poetry.core.masonry.api"