Spaces:

Qdrant
/

webinar-vibe-coding-rag

Running

App Files Files Community

lukawskikacper commited on Mar 21

Commit

74cf6bd

1 Parent(s): 958cc77

Vibe coded implementation (with some manual fixes)

Browse files

(cherry picked from commit 255acda8c8bcb989fd72006b84dee18553468356)

Files changed (25) hide show

.dockerignore +49 -0
CLAUDE.md +33 -0
Dockerfile +41 -0
app/__init__.py +1 -0
app/api/__init__.py +1 -0
app/api/router.py +6 -0
app/api/video.py +143 -0
app/main.py +63 -0
app/models/__init__.py +1 -0
app/models/video.py +32 -0
app/services/__init__.py +1 -0
app/services/qdrant_service.py +41 -0
app/services/video_service.py +647 -0
app/static/css/style.css +137 -0
app/static/js/index.js +268 -0
app/static/js/main.js +139 -0
app/static/js/video.js +440 -0
app/templates/base.html +82 -0
app/templates/index.html +98 -0
app/templates/video.html +62 -0
docker-compose.yml +28 -0
example.env +3 -0
gunicorn.conf.py +32 -0
poetry.lock +0 -0
pyproject.toml +12 -1

.dockerignore ADDED Viewed

	@@ -0,0 +1,49 @@

+# Git
+.git
+.gitignore
+# Python
+__pycache__/
+*.py[cod]
+*$py.class
+*.so
+.Python
+env/
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+*.egg-info/
+.installed.cfg
+*.egg
+# Virtual environment
+venv/
+.env
+.venv/
+ENV/
+# Docker
+.dockerignore
+Dockerfile
+docker-compose.yml
+# IDE
+.idea/
+.vscode/
+*.swp
+*.swo
+# Misc
+.DS_Store
+.pytest_cache/
+htmlcov/
+.coverage
+.tox/

CLAUDE.md ADDED Viewed

	@@ -0,0 +1,33 @@

+# Development Guidelines for Vibe Coding RAG
+## Commands
+- Build/Install: `poetry install`
+- Run: `poetry run python -m app.main` (once app is created)
+- Lint: `poetry run ruff check .`
+- Format: `poetry run ruff format .`
+- Test: `poetry run pytest`
+- Run single test: `poetry run pytest path/to/test.py::test_function_name -v`
+## Code Style
+- **Imports**: Group standard library, third-party, and local imports
+- **Formatting**: Use Black/Ruff compatible formatting
+- **Types**: Use type annotations for function parameters and return values
+- **Naming**:
+  - Variables/functions: snake_case
+  - Classes: PascalCase
+  - Constants: UPPER_SNAKE_CASE
+- **Error Handling**: Use try/except with specific exceptions
+- **Documentation**: Docstrings for all public functions and classes
+## Technologies
+- Vector DB: Qdrant
+- Embeddings: SentenceTransformers with sentence-transformers/static-retrieval-mrl-en-v1
+- API: FastAPI (when implemented)
+- Frontend: HTML/CSS/JavaScript with DaisyUI components
+## MCP Integration
+- Always call qdrant-code-search find tool when you are about to generate frontend code (HTML/CSS/JS)
+- Store generated code snippets in qdrant-code-search store tool for future reference
+## Qdrant
+- Point IDs have to be string-like UUIDs

Dockerfile ADDED Viewed

	@@ -0,0 +1,41 @@

+FROM python:3.10-slim
+# Install system dependencies
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    curl \
+    && rm -rf /var/lib/apt/lists/*
+WORKDIR /app
+# Install Poetry
+RUN pip install poetry==1.8.3
+# Copy poetry configuration files
+COPY pyproject.toml poetry.lock poetry.toml* ./
+# Configure poetry to not create a virtual environment
+RUN poetry config virtualenvs.create false
+# Install dependencies
+RUN poetry install --no-dev --no-interaction --no-ansi
+# Copy application code
+COPY app ./app
+# Expose port
+EXPOSE 8000
+# Set environment variables
+ENV PYTHONPATH=/app
+ENV QDRANT_URL=http://localhost:6333
+# ENV QDRANT_API_KEY=your_api_key_here (uncomment and set if needed)
+# Calculate the number of workers based on available CPUs
+# Using the recommended formula: (2 * CPU cores) + 1
+ENV WORKERS=4
+# Create gunicorn config file
+COPY gunicorn.conf.py ./
+# Command to run the application with Gunicorn and Uvicorn workers
+CMD ["gunicorn", "app.main:app", "-c", "gunicorn.conf.py"]

app/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ # Initialize app package

app/api/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ # Initialize API package

app/api/router.py ADDED Viewed

	@@ -0,0 +1,6 @@

+from fastapi import APIRouter
+from app.api import video
+router = APIRouter()
+router.include_router(video.router, prefix="/video", tags=["video"])

app/api/video.py ADDED Viewed

	@@ -0,0 +1,143 @@

+from fastapi import APIRouter, HTTPException, Query
+from typing import List, Optional
+from app.models.video import Video, SearchResult, VideoSegment
+from app.services.video_service import (
+    process_video,
+    search_video_segments,
+    get_all_segments,
+    get_processed_videos,
+    get_video_by_id,
+)
+from pydantic import BaseModel
+router = APIRouter()
+class VideoRequest(BaseModel):
+    url: str
+class VideoResponse(BaseModel):
+    """Response model for video processing with additional status information."""
+    video: Video
+    newly_processed: bool = False
+@router.post("/process", response_model=VideoResponse)
+async def process_video_endpoint(video_request: VideoRequest) -> VideoResponse:
+    """Process a YouTube video to extract and store transcript segments.
+    If the video has already been processed, returns the existing data without reprocessing."""
+    try:
+        import logging
+        # Get the video ID first
+        from app.services.video_service import extract_video_id, get_video_by_id
+        video_id = extract_video_id(video_request.url)
+        # Check if already processed
+        existing_video = get_video_by_id(video_id)
+        already_processed = existing_video is not None and existing_video.processed
+        if already_processed:
+            logging.info(f"Video {video_id} already processed, returning existing data")
+            return VideoResponse(video=existing_video, newly_processed=False)
+        # Process the video if needed
+        result = process_video(video_request.url)
+        return VideoResponse(video=result, newly_processed=True)
+    except Exception as e:
+        import logging
+        import traceback
+        logging.error(f"Error processing video URL {video_request.url}: {str(e)}")
+        logging.error(traceback.format_exc())
+        raise HTTPException(status_code=500, detail=str(e))
+@router.get("/search")
+async def search_video_endpoint(
+    query: str = Query(..., description="Search query for video content"),
+    video_id: Optional[str] = Query(
+        None, description="Optional YouTube video ID to limit search"
+    ),
+    limit: int = Query(5, description="Maximum number of results to return"),
+) -> List[SearchResult]:
+    """Search for video segments based on the provided query."""
+    import logging
+    # Check for invalid video_id
+    if video_id and (video_id.lower() == "undefined" or video_id.lower() == "null"):
+        logging.warning(f"Invalid video_id in search request: '{video_id}'")
+        video_id = None  # Clear invalid video_id to perform a global search instead
+    try:
+        results = search_video_segments(query, video_id, limit)
+        return results
+    except Exception as e:
+        logging.error(
+            f"Error searching for query '{query}' with video_id '{video_id}': {str(e)}"
+        )
+        raise HTTPException(status_code=500, detail=str(e))
+@router.get("/segments/{video_id}")
+async def get_segments_endpoint(video_id: str) -> List[VideoSegment]:
+    """Get all segments for a specific video, ordered by start time."""
+    import logging
+    # Check for invalid video ID
+    if not video_id or video_id.lower() == "undefined" or video_id.lower() == "null":
+        logging.warning(f"Invalid video ID requested: '{video_id}'")
+        return []  # Return empty list for invalid IDs to avoid frontend errors
+    try:
+        segments = get_all_segments(video_id)
+        if not segments:
+            # Return an empty list instead of 404 to allow frontend to handle gracefully
+            return []
+        return segments
+    except Exception as e:
+        # Log the exception for debugging
+        logging.error(f"Error getting segments for video {video_id}: {str(e)}")
+        raise HTTPException(
+            status_code=500, detail=f"Could not retrieve video segments: {str(e)}"
+        )
+@router.get("/recent")
+async def get_recent_videos_endpoint(
+    limit: int = Query(10, description="Maximum number of videos to return"),
+) -> List[Video]:
+    """Get recently processed videos ordered by creation time."""
+    try:
+        videos = get_processed_videos(limit=limit)
+        return videos
+    except Exception as e:
+        # Log the exception for debugging
+        import logging
+        logging.error(f"Error getting recent videos: {str(e)}")
+        raise HTTPException(
+            status_code=500, detail=f"Could not retrieve recent videos: {str(e)}"
+        )
+@router.get("/info/{video_id}")
+async def get_video_info_endpoint(video_id: str) -> Video:
+    """Get metadata for a specific video."""
+    try:
+        video = get_video_by_id(video_id)
+        if not video:
+            # Return a basic video object if not found in database
+            return Video(video_id=video_id, title=f"Video {video_id}")
+        return video
+    except Exception as e:
+        import logging
+        logging.error(f"Error getting video info for {video_id}: {str(e)}")
+        raise HTTPException(
+            status_code=500, detail=f"Could not retrieve video info: {str(e)}"
+        )

app/main.py ADDED Viewed

	@@ -0,0 +1,63 @@

+from fastapi import FastAPI, Request
+from fastapi.staticfiles import StaticFiles
+from fastapi.templating import Jinja2Templates
+from fastapi.responses import HTMLResponse, RedirectResponse
+from fastapi.middleware.cors import CORSMiddleware
+from app.api import router as api_router
+from app.services.video_service import get_video_by_id
+app = FastAPI(title="In-Video Search", docs_url=None, redoc_url=None, openapi_url=None)
+# Enable CORS
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],  # Adjust this in production
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+# Mount static files
+app.mount("/static", StaticFiles(directory="app/static"), name="static")
+# Templates
+templates = Jinja2Templates(directory="app/templates")
+@app.get("/", response_class=HTMLResponse)
+async def index(request: Request):
+    return templates.TemplateResponse(
+        "index.html", {"request": request, "title": "In-Video Search"}
+    )
+@app.get("/video/{video_id}", response_class=HTMLResponse)
+async def video_page(request: Request, video_id: str):
+    # Try to get video info from database
+    video = get_video_by_id(video_id)
+    title = "Video Player"
+    # If video exists and has a title, use it
+    if video and video.title:
+        title = video.title
+    return templates.TemplateResponse(
+        "video.html",
+        {"request": request, "title": title, "video_id": video_id},
+    )
+@app.get("/watch")
+async def watch_redirect(request: Request, v: str):
+    # Redirect YouTube-style URLs to our video page
+    return RedirectResponse(url=f"/video/{v}")
+# Include API routers
+app.include_router(api_router.router, prefix="/api")
+if __name__ == "__main__":
+    import uvicorn
+    uvicorn.run("app.main:app", host="0.0.0.0", port=8000, reload=True)

app/models/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ # Initialize models package

app/models/video.py ADDED Viewed

	@@ -0,0 +1,32 @@

+from pydantic import BaseModel, Field
+from typing import Optional
+class VideoSegment(BaseModel):
+    """Model for a video segment with transcript."""
+    text: str = Field(..., description="Transcript text of the segment")
+    start: float = Field(..., description="Start time in seconds")
+    end: float = Field(..., description="End time in seconds")
+    segment_id: str = Field(..., description="Unique identifier for the segment")
+    video_id: str = Field(..., description="YouTube video ID this segment belongs to")
+class Video(BaseModel):
+    """Model for a YouTube video with metadata."""
+    video_id: str = Field(..., description="YouTube video ID")
+    title: Optional[str] = Field(None, description="Video title")
+    description: Optional[str] = Field(None, description="Video description")
+    channel: Optional[str] = Field(None, description="Channel name")
+    processed: bool = Field(False, description="Whether the video has been processed")
+    created_at: Optional[int] = Field(
+        None, description="Unix timestamp (seconds since epoch) when the video was processed"
+    )
+class SearchResult(BaseModel):
+    """Model for a video segment search result."""
+    score: float = Field(..., description="Similarity score")
+    segment: VideoSegment = Field(..., description="The matching video segment")

app/services/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ # Initialize services package

app/services/qdrant_service.py ADDED Viewed

	@@ -0,0 +1,41 @@

+import os
+from qdrant_client import QdrantClient
+import logging
+def get_qdrant_client() -> QdrantClient:
+    """
+    Initialize a Qdrant client using environment variables or default to localhost.
+    Environment variables:
+    - QDRANT_URL: URL for Qdrant server (default: http://localhost:6333)
+    - QDRANT_API_KEY: Optional API key for authentication
+    Returns:
+        QdrantClient: Configured Qdrant client
+    """
+    # Get configuration from environment variables with defaults
+    url = os.getenv("QDRANT_URL", "http://localhost:6333")
+    api_key = os.getenv("QDRANT_API_KEY")
+    # Configure client with or without API key
+    if api_key:
+        client = QdrantClient(location=url, api_key=api_key)
+        logging.info(f"Connecting to Qdrant at {url} with API key")
+    else:
+        client = QdrantClient(location=url)
+        logging.info(f"Connecting to Qdrant at {url}")
+    # Test connection
+    try:
+        client.get_collections()
+        logging.info(f"Successfully connected to Qdrant at {url}")
+    except Exception as e:
+        logging.error(f"Failed to connect to Qdrant at {url}: {e}")
+        # Connection will be tested again when used
+    return client
+# Initialize global client instance
+qdrant_client = get_qdrant_client()

app/services/video_service.py ADDED Viewed

	@@ -0,0 +1,647 @@

+import uuid
+from typing import List, Dict, Any, Optional
+import re
+from datetime import datetime
+from sentence_transformers import SentenceTransformer
+from qdrant_client.http import models
+from youtube_transcript_api import YouTubeTranscriptApi
+import yt_dlp
+from app.models.video import VideoSegment, Video, SearchResult
+from app.services.qdrant_service import qdrant_client
+# Initialize the sentence transformer model
+model = SentenceTransformer("sentence-transformers/static-retrieval-mrl-en-v1")
+# Collection names
+COLLECTION_NAME = "video_segments"
+PROCESSED_VIDEOS_COLLECTION = "processed_videos"
+def _fetch_youtube_metadata(video_id: str, video: Optional[Video] = None) -> Video:
+    """Helper function to fetch video metadata from YouTube using yt-dlp."""
+    import logging
+    if not video:
+        video = Video(video_id=video_id)
+    try:
+        logging.info(f"Fetching metadata for video {video_id} from YouTube")
+        # Configure yt-dlp options
+        ydl_opts = {
+            "skip_download": True,  # Don't download the video
+            "quiet": True,  # Don't print progress
+            "no_warnings": True,  # Don't print warnings
+            "extract_flat": True,  # Don't extract videos in playlists
+            "format": "best",  # Best quality (doesn't matter since we're not downloading)
+        }
+        # Use yt-dlp to extract video info
+        with yt_dlp.YoutubeDL(ydl_opts) as ydl:
+            info = ydl.extract_info(
+                f"https://www.youtube.com/watch?v={video_id}", download=False
+            )
+            # Set video properties if available
+            if info.get("title"):
+                video.title = info.get("title")
+            if info.get("description"):
+                video.description = info.get("description")
+            if info.get("uploader"):
+                video.channel = info.get("uploader")
+        logging.info(
+            f"Successfully retrieved video metadata: title='{video.title}', channel='{video.channel}'"
+        )
+    except Exception as meta_error:
+        logging.warning(f"Could not fetch metadata from YouTube: {str(meta_error)}")
+        if not video.title:
+            video.title = f"Video {video_id}"
+    return video
+# Ensure collections exist
+def ensure_collection_exists():
+    """Ensure the required collections exist in Qdrant."""
+    import logging
+    try:
+        logging.info("Checking Qdrant collections")
+        collections = qdrant_client.get_collections().collections
+        collection_names = [collection.name for collection in collections]
+        logging.info(f"Existing collections: {collection_names}")
+        # Create video segments collection if it doesn't exist
+        if COLLECTION_NAME not in collection_names:
+            logging.info(f"Creating collection: {COLLECTION_NAME}")
+            vector_size = model.get_sentence_embedding_dimension()
+            qdrant_client.create_collection(
+                collection_name=COLLECTION_NAME,
+                vectors_config=models.VectorParams(
+                    size=vector_size,
+                    distance=models.Distance.COSINE,
+                ),
+            )
+            logging.info(
+                f"Successfully created {COLLECTION_NAME} collection with vector size {vector_size}"
+            )
+        # Create processed videos collection if it doesn't exist
+        if PROCESSED_VIDEOS_COLLECTION not in collection_names:
+            logging.info(f"Creating collection: {PROCESSED_VIDEOS_COLLECTION}")
+            vector_size = model.get_sentence_embedding_dimension()
+            qdrant_client.create_collection(
+                collection_name=PROCESSED_VIDEOS_COLLECTION,
+                vectors_config=models.VectorParams(
+                    size=vector_size,
+                    distance=models.Distance.COSINE,
+                ),
+            )
+            qdrant_client.create_payload_index(
+                collection_name=PROCESSED_VIDEOS_COLLECTION,
+                field_name="video_id",
+                field_schema=models.PayloadSchemaType.KEYWORD,
+            )
+            qdrant_client.create_payload_index(
+                collection_name=PROCESSED_VIDEOS_COLLECTION,
+                field_name="created_at",
+                field_schema=models.IntegerIndexParams(
+                    type=models.IntegerIndexType.INTEGER,
+                    range=True,
+                ),
+            )
+            logging.info(
+                f"Successfully created {PROCESSED_VIDEOS_COLLECTION} collection with vector size {vector_size}"
+            )
+    except Exception as e:
+        import traceback
+        logging.error(f"Error ensuring collections exist: {str(e)}")
+        logging.error(traceback.format_exc())
+        raise
+def get_embeddings(text: str) -> List[float]:
+    """Get embeddings for the given text using SentenceTransformer."""
+    return model.encode(text).tolist()
+def extract_video_id(youtube_url: str) -> str:
+    """Extract YouTube video ID from URL."""
+    import logging
+    logging.info(f"Extracting video ID from URL: {youtube_url}")
+    # Match patterns like: https://www.youtube.com/watch?v=VIDEO_ID or https://youtu.be/VIDEO_ID
+    patterns = [
+        r"(?:youtube\.com/watch\?v=|youtu\.be/)([\w-]+)",
+        r"(?:youtube\.com/embed/)([\w-]+)",
+        r"(?:youtube\.com/v/)([\w-]+)",
+    ]
+    for pattern in patterns:
+        match = re.search(pattern, youtube_url)
+        if match:
+            video_id = match.group(1)
+            logging.info(f"Extracted video ID: {video_id}")
+            return video_id
+    # If no pattern matches, assume the input might be a direct video ID
+    if re.match(r"^[\w-]+$", youtube_url):
+        logging.info(f"Using direct video ID: {youtube_url}")
+        return youtube_url
+    logging.error(f"Failed to extract video ID from URL: {youtube_url}")
+    raise ValueError(f"Could not extract video ID from URL: {youtube_url}")
+def get_video_transcript(video_id: str) -> List[Dict[str, Any]]:
+    """
+    Get transcript for a YouTube video in any available language.
+    Will try to get transcripts in this priority:
+    1. English transcript (if available)
+    2. Any available transcript translated to English (if translatable)
+    3. Any available transcript in its original language
+    """
+    import logging
+    import traceback
+    try:
+        # Try to get available transcript languages
+        transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
+        # First, look for English transcript
+        english_transcript = None
+        other_transcripts = []
+        # Categorize available transcripts
+        for transcript_item in transcript_list:
+            if transcript_item.language_code == "en":
+                english_transcript = transcript_item
+            else:
+                other_transcripts.append(transcript_item)
+        # 1. Try English first if available
+        if english_transcript:
+            try:
+                logging.info("Found English transcript, using it directly")
+                return english_transcript.fetch()
+            except Exception as e:
+                logging.warning(f"Failed to fetch English transcript: {str(e)}")
+        # 2. Try translatable transcripts
+        translatable_transcripts = [t for t in other_transcripts if t.is_translatable]
+        for transcript_item in translatable_transcripts:
+            try:
+                logging.info(
+                    f"Trying to translate {transcript_item.language_code} transcript to English"
+                )
+                translated = transcript_item.translate("en").fetch()
+                logging.info(
+                    f"Successfully translated {transcript_item.language_code} transcript to English"
+                )
+                return translated
+            except Exception as e:
+                logging.warning(
+                    f"Failed to translate {transcript_item.language_code} transcript: {str(e)}"
+                )
+        # 3. Try any transcript in original language
+        for transcript_item in other_transcripts:
+            try:
+                logging.info(
+                    f"Using non-translated {transcript_item.language_code} transcript"
+                )
+                return transcript_item.fetch()
+            except Exception as e:
+                logging.warning(
+                    f"Failed to fetch {transcript_item.language_code} transcript: {str(e)}"
+                )
+        # If we get here, no transcripts worked
+        available_langs = [t.language_code for t in transcript_list]
+        raise ValueError(
+            f"No usable transcripts found for video {video_id}. Available languages: {available_langs}"
+        )
+    except Exception as e:
+        logging.error(f"Transcript API error for video {video_id}: {str(e)}")
+        logging.error(traceback.format_exc())
+        raise ValueError(f"Could not get transcript for video {video_id}: {str(e)}")
+def store_processed_video(video: Video) -> bool:
+    """Store a processed video in Qdrant."""
+    try:
+        # Get a simple embedding for the video ID
+        vector = get_embeddings(f"video_{video.video_id}")
+        # Prepare payload
+        payload = video.model_dump()
+        # Store in Qdrant
+        qdrant_client.upsert(
+            collection_name=PROCESSED_VIDEOS_COLLECTION,
+            points=[
+                models.PointStruct(
+                    id=uuid.uuid4().hex,
+                    vector=vector,
+                    payload=payload,
+                ),
+            ],
+        )
+        return True
+    except Exception as e:
+        print(f"Error storing processed video: {e}")
+        return False
+def get_processed_videos(limit: int = 10) -> List[Video]:
+    """Get recently processed videos ordered by creation time."""
+    try:
+        # Scroll through the processed videos collection
+        scroll_result = qdrant_client.scroll(
+            collection_name=PROCESSED_VIDEOS_COLLECTION,
+            limit=limit,
+            with_payload=True,
+            order_by=models.OrderBy(key="created_at", direction=models.Direction.DESC),
+        )
+        # Extract videos from the result
+        videos = []
+        for point in scroll_result[0]:
+            # Convert payload to Video
+            video = Video(**point.payload)
+            videos.append(video)
+        # Sort by created_at timestamp (most recent first)
+        videos.sort(key=lambda x: x.created_at or "", reverse=True)
+        return videos[:limit]
+    except Exception as e:
+        print(f"Error getting processed videos: {e}")
+        return []
+def process_video(youtube_url: str) -> Video:
+    """Process a YouTube video to extract and store transcript segments."""
+    import logging
+    import traceback
+    logging.info(f"Processing video URL: {youtube_url}")
+    transcript = None
+    video_id = None
+    # Extract video ID and get transcript
+    try:
+        # Extract video ID
+        video_id = extract_video_id(youtube_url)
+        logging.info(f"Successfully extracted video ID: {video_id}")
+        # Check if video has already been processed
+        existing_video = get_video_by_id(video_id)
+        if existing_video and existing_video.processed:
+            logging.info(
+                f"Video {video_id} has already been processed. Skipping processing."
+            )
+            return existing_video
+        # Create basic video object with current timestamp
+        current_time = int(datetime.utcnow().timestamp())
+        video = Video(video_id=video_id, created_at=current_time)
+        # Get video metadata from YouTube using the helper function
+        try:
+            video = _fetch_youtube_metadata(video_id, video)
+        except Exception as meta_error:
+            logging.warning(
+                f"Error fetching YouTube metadata during processing: {str(meta_error)}"
+            )
+            # Continue with processing even if metadata fetch fails
+        # Get transcript
+        logging.info(f"Fetching transcript for video ID: {video_id}")
+        transcript = get_video_transcript(video_id)
+        logging.info(
+            f"Successfully retrieved transcript with {len(transcript)} entries"
+        )
+        # If we couldn't get metadata and have a transcript, try to extract a title from transcript
+        if (
+            (not video.title or video.title == f"Video {video_id}")
+            and transcript
+            and len(transcript) > 0
+        ):
+            # Handle different transcript formats
+            try:
+                # Check if transcript is a list of dictionaries (original format)
+                if isinstance(transcript[0], dict) and "text" in transcript[0]:
+                    video.title = f"{transcript[0]['text'][:30]}..."
+                # Check if transcript is a list of objects with text attribute
+                elif hasattr(transcript[0], "text"):
+                    video.title = f"{transcript[0].text[:30]}..."
+                # If it's another format, just use the string representation of first item
+                else:
+                    first_item_str = str(transcript[0])[:30]
+                    video.title = f"{first_item_str}..."
+                logging.info(f"Set video title from transcript: {video.title}")
+            except Exception as title_error:
+                logging.warning(
+                    f"Could not set title from transcript: {str(title_error)}"
+                )
+    except Exception as e:
+        logging.error(f"Error in initial video processing: {str(e)}")
+        logging.error(traceback.format_exc())
+        raise
+    # Process transcript into segments
+    try:
+        # Process transcript into overlapping 30-second segments with 10-second overlap
+        logging.info(f"Processing {len(transcript)} transcript entries into segments")
+        segments = []
+        # First, normalize the transcript to a standard format
+        normalized_transcript = []
+        for item in transcript:
+            if (
+                isinstance(item, dict)
+                and "text" in item
+                and "start" in item
+                and "duration" in item
+            ):
+                # Original dictionary format
+                normalized_transcript.append(
+                    {
+                        "text": item["text"],
+                        "start": item["start"],
+                        "duration": item["duration"],
+                    }
+                )
+            elif (
+                hasattr(item, "text")
+                and hasattr(item, "start")
+                and hasattr(item, "duration")
+            ):
+                # Object with attributes
+                normalized_transcript.append(
+                    {"text": item.text, "start": item.start, "duration": item.duration}
+                )
+            else:
+                # Unknown format, try to extract what we can
+                logging.warning(
+                    f"Encountered unknown transcript item format: {type(item)}"
+                )
+                try:
+                    # Convert to string if we can't determine the structure
+                    text = str(item)
+                    # Use index as a timestamp approximation
+                    idx = transcript.index(item)
+                    normalized_transcript.append(
+                        {
+                            "text": text,
+                            "start": float(idx * 5),  # Approximate 5 seconds per item
+                            "duration": 5.0,
+                        }
+                    )
+                except Exception as e:
+                    logging.error(f"Failed to normalize transcript item: {str(e)}")
+                    continue
+        # Use the normalized transcript for segment processing
+        for i in range(len(normalized_transcript)):
+            # Find segments that form approximately 30 seconds
+            segment_text = []
+            start_time = normalized_transcript[i]["start"]
+            end_time = start_time
+            current_index = i
+            while (
+                current_index < len(normalized_transcript)
+                and end_time - start_time < 30
+            ):
+                segment_text.append(normalized_transcript[current_index]["text"])
+                end_time = (
+                    normalized_transcript[current_index]["start"]
+                    + normalized_transcript[current_index]["duration"]
+                )
+                current_index += 1
+            if segment_text:  # Only create segment if we have text
+                segment_id = f"{video_id}_{i}"
+                text = " ".join(segment_text)
+                # Create VideoSegment
+                segment = VideoSegment(
+                    text=text,
+                    start=start_time,
+                    end=end_time,
+                    segment_id=segment_id,
+                    video_id=video_id,
+                )
+                segments.append(segment)
+            # Skip forward with 10-second overlap (if we're not at the end)
+            if (
+                i + 1 < len(normalized_transcript)
+                and normalized_transcript[i + 1]["start"] < end_time - 10
+            ):
+                # Find the next segment that starts at least 20 seconds after our current start
+                while (
+                    i + 1 < len(normalized_transcript)
+                    and normalized_transcript[i + 1]["start"] < start_time + 20
+                ):
+                    i += 1
+        logging.info(f"Created {len(segments)} segments from transcript")
+        # Store segments in Qdrant
+        logging.info("Ensuring Qdrant collections exist")
+        ensure_collection_exists()
+        # Store each segment
+        logging.info(f"Storing {len(segments)} segments in Qdrant")
+        for segment in segments:
+            store_segment(segment)
+    except Exception as e:
+        logging.error(f"Error processing transcript segments: {str(e)}")
+        logging.error(traceback.format_exc())
+        raise
+    # Mark video as processed and store it
+    try:
+        logging.info(f"Marking video {video_id} as processed")
+        video.processed = True
+        # Store the processed video in Qdrant
+        logging.info("Storing processed video in Qdrant")
+        store_result = store_processed_video(video)
+        if store_result:
+            logging.info(f"Successfully stored processed video: {video_id}")
+        else:
+            logging.warning(f"Failed to store processed video in Qdrant: {video_id}")
+        return video
+    except Exception as e:
+        logging.error(f"Error storing processed video: {str(e)}")
+        logging.error(traceback.format_exc())
+        raise
+def store_segment(segment: VideoSegment) -> bool:
+    """Store a video segment in Qdrant."""
+    import logging
+    try:
+        # Get embeddings
+        logging.debug(f"Getting embeddings for segment {segment.segment_id}")
+        vector = get_embeddings(segment.text)
+        # Prepare payload
+        payload = segment.model_dump()
+        # Store in Qdrant
+        point_id = uuid.uuid4().hex
+        logging.debug(
+            f"Storing segment {segment.segment_id} in Qdrant with point ID {point_id}"
+        )
+        qdrant_client.upsert(
+            collection_name=COLLECTION_NAME,
+            points=[
+                models.PointStruct(
+                    id=point_id,
+                    vector=vector,
+                    payload=payload,
+                ),
+            ],
+        )
+        return True
+    except Exception as e:
+        import traceback
+        logging.error(f"Error storing segment {segment.segment_id}: {str(e)}")
+        logging.error(traceback.format_exc())
+        return False
+def search_video_segments(
+    query: str, video_id: Optional[str] = None, limit: int = 5
+) -> List[SearchResult]:
+    """Search for video segments based on the provided query."""
+    # Get query embeddings
+    query_vector = get_embeddings(query)
+    # Prepare filter if video_id is provided
+    filter_param = None
+    if video_id:
+        filter_param = models.Filter(
+            must=[
+                models.FieldCondition(
+                    key="video_id",
+                    match=models.MatchValue(value=video_id),
+                ),
+            ],
+        )
+    # Search in Qdrant
+    search_result = qdrant_client.search(
+        collection_name=COLLECTION_NAME,
+        query_vector=query_vector,
+        limit=limit,
+        query_filter=filter_param,
+    )
+    # Format results
+    results = []
+    for scored_point in search_result:
+        # Convert payload to VideoSegment
+        segment = VideoSegment(**scored_point.payload)
+        # Create SearchResult
+        result = SearchResult(
+            score=scored_point.score,
+            segment=segment,
+        )
+        results.append(result)
+    return results
+def get_all_segments(video_id: str) -> List[VideoSegment]:
+    """Get all segments for a specific video, ordered by start time."""
+    # Prepare filter for the video_id
+    filter_param = models.Filter(
+        must=[
+            models.FieldCondition(
+                key="video_id",
+                match=models.MatchValue(value=video_id),
+            ),
+        ],
+    )
+    # Search in Qdrant without vector, just to get all segments
+    scroll_result = qdrant_client.scroll(
+        collection_name=COLLECTION_NAME,
+        scroll_filter=filter_param,
+        limit=10000,  # Adjust based on expected maximum segments
+    )
+    # Format results
+    segments = []
+    for point in scroll_result[0]:
+        # Convert payload to VideoSegment
+        segment = VideoSegment(**point.payload)
+        segments.append(segment)
+    # Sort by start time
+    segments.sort(key=lambda x: x.start)
+    return segments
+def get_video_by_id(video_id: str) -> Optional[Video]:
+    """Get a specific video by its video_id. If not found in database, attempt to fetch from YouTube."""
+    import logging
+    try:
+        # Create filter for the video_id
+        filter_param = models.Filter(
+            must=[
+                models.FieldCondition(
+                    key="video_id",
+                    match=models.MatchValue(value=video_id),
+                ),
+            ],
+        )
+        # Search in the processed_videos collection
+        scroll_result = qdrant_client.scroll(
+            collection_name=PROCESSED_VIDEOS_COLLECTION,
+            scroll_filter=filter_param,
+            limit=1,  # We only need one result
+            with_payload=True,
+        )
+        # Check if any results were found
+        if scroll_result[0]:
+            # Convert payload to Video
+            video = Video(**scroll_result[0][0].payload)
+            # If video exists but doesn't have title, try to fetch it from YouTube
+            if not video.title or video.title == f"Video {video_id}":
+                video = _fetch_youtube_metadata(video_id, video)
+            return video
+        # If video not found in database, fetch basic metadata from YouTube
+        logging.info(f"Video {video_id} not found in database, fetching from YouTube")
+        video = Video(video_id=video_id)
+        return _fetch_youtube_metadata(video_id, video)
+    except Exception as e:
+        logging.error(f"Error getting video by ID {video_id}: {str(e)}")
+        # Return a basic video object with just the ID
+        return Video(video_id=video_id, title=f"Video {video_id}")

app/static/css/style.css ADDED Viewed

	@@ -0,0 +1,137 @@

+/* Custom styles */
+/* Video Carousel */
+.carousel-item {
+    scroll-snap-align: center;
+}
+/* Make carousel items responsive but maintain minimum width */
+@media (max-width: 640px) {
+    .carousel-item {
+        min-width: 200px;
+    }
+}
+@media (min-width: 640px) {
+    .carousel-item {
+        min-width: 250px;
+    }
+}
+/* Carousel container - don't let arrows overlap content */
+.carousel {
+    overflow-x: hidden;
+    scrollbar-width: none; /* Hide scrollbar for Firefox */
+    -ms-overflow-style: none; /* Hide scrollbar for IE/Edge */
+}
+.carousel::-webkit-scrollbar {
+    display: none; /* Hide scrollbar for Chrome/Safari/Opera */
+}
+/* Navigation arrow styles */
+.btn-circle.btn-disabled {
+    opacity: 0.5;
+    cursor: not-allowed;
+}
+/* Video card styling - ensure proper structure */
+.carousel-item .card {
+    display: flex;
+    flex-direction: column;
+    height: 100%;
+}
+.carousel-item .card figure {
+    width: 100%;
+    flex: 0 0 auto;
+}
+.carousel-item .card .card-body {
+    flex: 1 0 auto;
+    display: flex;
+    flex-direction: column;
+}
+/* Transcript container */
+.transcript-container {
+    max-height: 500px;
+    overflow-y: auto;
+    padding-right: 1rem;
+}
+/* Transcript segments */
+.transcript-segment {
+    padding: 0.625rem;
+    margin-bottom: 0.5rem;
+    border-radius: 0.5rem;
+    cursor: pointer;
+    transition: all 0.2s ease;
+    border: 1px solid transparent;
+    background-color: var(--base-200, #f3f4f6);
+}
+.transcript-segment:hover {
+    background-color: var(--base-300, #e5e7eb);
+    transform: translateY(-1px);
+    box-shadow: 0 2px 5px rgba(0, 0, 0, 0.05);
+}
+.transcript-segment.highlight {
+    background-color: var(--primary-focus, rgba(59, 130, 246, 0.2));
+    border-left: 3px solid var(--primary, #3b82f6);
+    box-shadow: 0 2px 8px rgba(0, 0, 0, 0.08);
+}
+.transcript-segment.hidden-segment {
+    display: none;
+}
+/* Timestamp */
+.timestamp {
+    display: inline-block;
+    background-color: var(--neutral, #e5e7eb);
+    padding: 0.125rem 0.5rem;
+    border-radius: 9999px;
+    font-size: 0.75rem;
+    font-weight: bold;
+    color: var(--neutral-content, #4b5563);
+    margin-right: 0.5rem;
+}
+/* Score badge */
+.score-badge {
+    display: inline-block;
+    background-color: var(--primary, #3b82f6);
+    color: var(--primary-content, white);
+    border-radius: 9999px;
+    padding: 0.125rem 0.5rem;
+    font-size: 0.75rem;
+    margin-left: 0.5rem;
+}
+/* Search result */
+.search-result {
+    transition: all 0.2s ease;
+}
+.search-result:hover {
+    transform: translateY(-2px);
+}
+/* Metadata tags */
+.metadata-tags {
+    display: flex;
+    flex-wrap: wrap;
+    gap: 0.25rem;
+    margin-top: 0.5rem;
+}
+.metadata-tag {
+    font-size: 0.7rem;
+    padding: 0.1rem 0.4rem;
+    border-radius: 9999px;
+    background-color: var(--accent, #d8b4fe);
+    color: var(--accent-content, #581c87);
+    white-space: nowrap;
+}

app/static/js/index.js ADDED Viewed

	@@ -0,0 +1,268 @@

+// Index page functionality
+document.addEventListener('DOMContentLoaded', () => {
+    const youtubeUrlInput = document.getElementById('youtube-url');
+    const processButton = document.getElementById('process-button');
+    const processStatus = document.getElementById('process-status');
+    const processingIndicator = document.getElementById('processing');
+    const recentlyProcessedCard = document.getElementById('recently-processed');
+    const videoListContainer = document.getElementById('video-list');
+    // Example video buttons
+    const exampleButtons = document.querySelectorAll('.example-video');
+    // Process button click handler
+    processButton.addEventListener('click', () => processVideo());
+    // Enter key in input field
+    youtubeUrlInput.addEventListener('keypress', (e) => {
+        if (e.key === 'Enter') processVideo();
+    });
+    // Example video buttons
+    exampleButtons.forEach(button => {
+        button.addEventListener('click', () => {
+            youtubeUrlInput.value = button.dataset.url;
+            processVideo();
+        });
+    });
+    // Process video function
+    function processVideo() {
+        const youtubeUrl = youtubeUrlInput.value.trim();
+        if (!youtubeUrl) {
+            processStatus.innerHTML = '<div class="alert alert-warning">Please enter a YouTube URL</div>';
+            return;
+        }
+        // Extract video ID
+        const videoId = extractVideoId(youtubeUrl);
+        if (!videoId) {
+            processStatus.innerHTML = '<div class="alert alert-error">Invalid YouTube URL</div>';
+            return;
+        }
+        // Show loading indicator with spinner and text
+        processStatus.innerHTML = `
+            <div class="flex items-center justify-center my-4">
+                <span class="loading loading-spinner loading-md text-primary"></span>
+                <span class="ml-2">Processing video... This may take a few moments</span>
+            </div>
+        `;
+        // Set a timeout to handle overly long processing
+        const timeoutId = setTimeout(() => {
+            processStatus.innerHTML = `
+                <div class="alert alert-warning">
+                    <svg xmlns="http://www.w3.org/2000/svg" class="stroke-current shrink-0 h-6 w-6" fill="none" viewBox="0 0 24 24">
+                        <path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M12 9v2m0 4h.01m-6.938 4h13.856c1.54 0 2.502-1.667 1.732-3L13.732 4c-.77-1.333-2.694-1.333-3.464 0L3.34 16c-.77 1.333.192 3 1.732 3z" />
+                    </svg>
+                    <span>Processing is taking longer than expected. Please wait...</span>
+                </div>
+            `;
+        }, 20000); // 20 seconds
+        // Send request to process the video
+        fetch('/api/video/process', {
+            method: 'POST',
+            headers: {
+                'Content-Type': 'application/json'
+            },
+            body: JSON.stringify({ url: youtubeUrl })
+        })
+        .then(response => {
+            if (!response.ok) {
+                throw new Error('Failed to process video');
+            }
+            return response.json();
+        })
+        .then(data => {
+            // Clear timeout for long-running process
+            clearTimeout(timeoutId);
+            // Extract video ID from response (handles both old and new API formats)
+            const videoId = data.video ? data.video.video_id : data.video_id;
+            const isNewlyProcessed = data.newly_processed !== undefined ? data.newly_processed : true;
+            if (!videoId) {
+                throw new Error('Invalid response: Missing video ID');
+            }
+            // Get video title (for display)
+            const videoTitle = data.video ? data.video.title : (data.title || `Video ${videoId}`);
+            // Log for debugging
+            console.log('Process response:', {videoId, isNewlyProcessed, data});
+            // Show success message
+            processStatus.innerHTML = `
+                <div role="alert" class="alert alert-success">
+                    <svg xmlns="http://www.w3.org/2000/svg" class="stroke-current shrink-0 h-6 w-6" fill="none" viewBox="0 0 24 24">
+                        <path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M9 12l2 2 4-4m6 2a9 9 0 11-18 0 9 9 0 0118 0z" />
+                    </svg>
+                    <span>${isNewlyProcessed ? 'Video processed successfully!' : 'Video was already processed!'}</span>
+                    <div>
+                        <a href="/video/${videoId}" class="btn btn-sm btn-primary">
+                            <svg xmlns="http://www.w3.org/2000/svg" class="h-4 w-4 mr-1" fill="none" viewBox="0 0 24 24" stroke="currentColor">
+                                <path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M14.752 11.168l-3.197-2.132A1 1 0 0010 9.87v4.263a1 1 0 001.555.832l3.197-2.132a1 1 0 000-1.664z" />
+                                <path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M21 12a9 9 0 11-18 0 9 9 0 0118 0z" />
+                            </svg>
+                            Open Video
+                        </a>
+                    </div>
+                </div>
+            `;
+            // Update recent videos lists
+            displayRecentVideos();
+            loadFooterRecentVideos(); // Update footer videos as well
+        })
+        .catch(error => {
+            // Clear timeout for long-running process
+            clearTimeout(timeoutId);
+            // Show error message
+            console.error('Process error:', error);
+            processStatus.innerHTML = handleError(error);
+        });
+    }
+    // Display recently processed videos
+    function displayRecentVideos() {
+        // Show loading state
+        recentlyProcessedCard.classList.remove('hidden');
+        videoListContainer.innerHTML = `
+            <div class="flex justify-center items-center p-4">
+                <span class="loading loading-spinner loading-md"></span>
+                <span class="ml-2">Loading recent videos...</span>
+            </div>
+        `;
+        const carouselPrev = document.getElementById('carousel-prev');
+        const carouselNext = document.getElementById('carousel-next');
+        // Fetch recent videos from server
+        fetch('/api/video/recent?limit=5')
+            .then(response => {
+                if (!response.ok) {
+                    throw new Error('Failed to fetch recent videos');
+                }
+                return response.json();
+            })
+            .then(videos => {
+                if (videos && videos.length > 0) {
+                    // Limit to 5 videos
+                    const limitedVideos = videos.slice(0, 5);
+                    // Generate carousel items
+                    const carouselItems = limitedVideos.map((video, index) => {
+                        // Format date if available
+                        let formattedDate = '';
+                        if (video.created_at) {
+                            const date = new Date(video.created_at * 1000); // Convert Unix timestamp to milliseconds
+                            formattedDate = date.toLocaleDateString();
+                        }
+                        // Use title or default
+                        const videoTitle = video.title || `Video ${video.video_id}`;
+                        return `
+                            <div id="video-${index}" class="carousel-item">
+                                <a href="/video/${video.video_id}" class="card bg-base-100 shadow-sm hover:shadow-md transition-all w-64 md:w-72 flex flex-col">
+                                    <figure class="w-full h-36 overflow-hidden">
+                                        <img src="https://img.youtube.com/vi/${video.video_id}/mqdefault.jpg" alt="Thumbnail" class="w-full h-full object-cover">
+                                    </figure>
+                                    <div class="card-body p-3">
+                                        <h3 class="card-title text-sm line-clamp-2">${videoTitle}</h3>
+                                        <div class="text-xs opacity-70">${formattedDate}</div>
+                                    </div>
+                                </a>
+                            </div>
+                        `;
+                    }).join('');
+                    // Add carousel items to container
+                    videoListContainer.innerHTML = carouselItems;
+                    // Setup navigation arrows
+                    if (limitedVideos.length > 1) {
+                        // Show arrows for multiple videos
+                        let currentIndex = 0;
+                        const maxIndex = limitedVideos.length - 1;
+                        // Show navigation arrows
+                        carouselPrev.classList.remove('hidden');
+                        carouselNext.classList.remove('hidden');
+                        // Left button is disabled by default (we're at the start)
+                        const prevButton = carouselPrev.querySelector('button');
+                        const nextButton = carouselNext.querySelector('button');
+                        prevButton.classList.add('btn-disabled');
+                        // Functions to update button states
+                        const updateButtonStates = () => {
+                            if (currentIndex === 0) {
+                                prevButton.classList.add('btn-disabled');
+                            } else {
+                                prevButton.classList.remove('btn-disabled');
+                            }
+                            if (currentIndex === maxIndex) {
+                                nextButton.classList.add('btn-disabled');
+                            } else {
+                                nextButton.classList.remove('btn-disabled');
+                            }
+                        };
+                        // Setup navigation buttons
+                        prevButton.addEventListener('click', () => {
+                            if (currentIndex > 0) {
+                                currentIndex--;
+                                document.getElementById(`video-${currentIndex}`).scrollIntoView({
+                                    behavior: 'smooth',
+                                    block: 'nearest',
+                                    inline: 'center'
+                                });
+                                updateButtonStates();
+                            }
+                        });
+                        nextButton.addEventListener('click', () => {
+                            if (currentIndex < maxIndex) {
+                                currentIndex++;
+                                document.getElementById(`video-${currentIndex}`).scrollIntoView({
+                                    behavior: 'smooth',
+                                    block: 'nearest',
+                                    inline: 'center'
+                                });
+                                updateButtonStates();
+                            }
+                        });
+                    } else {
+                        // Hide arrows for single video
+                        carouselPrev.classList.add('hidden');
+                        carouselNext.classList.add('hidden');
+                    }
+                } else {
+                    recentlyProcessedCard.classList.add('hidden');
+                    carouselPrev.classList.add('hidden');
+                    carouselNext.classList.add('hidden');
+                }
+            })
+            .catch(error => {
+                console.error('Error fetching recent videos:', error);
+                videoListContainer.innerHTML = `
+                    <div class="alert alert-error">
+                        <svg xmlns="http://www.w3.org/2000/svg" class="stroke-current shrink-0 h-6 w-6" fill="none" viewBox="0 0 24 24">
+                            <path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M10 14l2-2m0 0l2-2m-2 2l-2-2m2 2l2 2m7-2a9 9 0 11-18 0 9 9 0 0118 0z" />
+                        </svg>
+                        <span>Failed to load recent videos</span>
+                    </div>
+                `;
+                carouselPrev.classList.add('hidden');
+                carouselNext.classList.add('hidden');
+            });
+    }
+    // Display recent videos on page load
+    displayRecentVideos();
+});

app/static/js/main.js ADDED Viewed

	@@ -0,0 +1,139 @@

+// Common functionality
+// Initialize on page load
+document.addEventListener('DOMContentLoaded', () => {
+    // Display recent videos in the footer on page load
+    loadFooterRecentVideos();
+    // Handle theme switching
+    const themeItems = document.querySelectorAll('.theme-item');
+    themeItems.forEach(item => {
+        item.addEventListener('click', () => {
+            const theme = item.dataset.theme;
+            document.documentElement.setAttribute('data-theme', theme);
+            localStorage.setItem('theme', theme);
+        });
+    });
+    // Apply saved theme from localStorage if available
+    const savedTheme = localStorage.getItem('theme');
+    if (savedTheme) {
+        document.documentElement.setAttribute('data-theme', savedTheme);
+    }
+});
+// Format seconds to MM:SS format
+function formatTime(seconds) {
+    const minutes = Math.floor(seconds / 60);
+    const secs = Math.floor(seconds % 60);
+    return `${minutes.toString().padStart(2, '0')}:${secs.toString().padStart(2, '0')}`;
+}
+// Error handling function
+function handleError(error) {
+    console.error('Error:', error);
+    return `<div role="alert" class="alert alert-error">
+        <svg xmlns="http://www.w3.org/2000/svg" class="stroke-current shrink-0 h-6 w-6" fill="none" viewBox="0 0 24 24">
+            <path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M10 14l2-2m0 0l2-2m-2 2l-2-2m2 2l2 2m7-2a9 9 0 11-18 0 9 9 0 0118 0z" />
+        </svg>
+        <span>Error: ${error.message || 'Something went wrong'}</span>
+        <div>
+            <button class="btn btn-sm btn-ghost" onclick="window.location.reload()">Retry</button>
+        </div>
+    </div>`;
+}
+// Toast notification function
+function showToast(message, type = 'info') {
+    const toast = document.createElement('div');
+    toast.className = `alert alert-${type} fixed bottom-4 right-4 max-w-xs z-50 shadow-lg`;
+    // Different icon based on type
+    let icon = '';
+    switch(type) {
+        case 'success':
+            icon = `<svg xmlns="http://www.w3.org/2000/svg" class="stroke-current shrink-0 h-6 w-6" fill="none" viewBox="0 0 24 24">
+                    <path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M9 12l2 2 4-4m6 2a9 9 0 11-18 0 9 9 0 0118 0z" />
+                </svg>`;
+            break;
+        case 'warning':
+            icon = `<svg xmlns="http://www.w3.org/2000/svg" class="stroke-current shrink-0 h-6 w-6" fill="none" viewBox="0 0 24 24">
+                    <path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M12 9v2m0 4h.01m-6.938 4h13.856c1.54 0 2.502-1.667 1.732-3L13.732 4c-.77-1.333-2.694-1.333-3.464 0L3.34 16c-.77 1.333.192 3 1.732 3z" />
+                </svg>`;
+            break;
+        case 'error':
+            icon = `<svg xmlns="http://www.w3.org/2000/svg" class="stroke-current shrink-0 h-6 w-6" fill="none" viewBox="0 0 24 24">
+                    <path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M10 14l2-2m0 0l2-2m-2 2l-2-2m2 2l2 2m7-2a9 9 0 11-18 0 9 9 0 0118 0z" />
+                </svg>`;
+            break;
+        default: // info
+            icon = `<svg xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 24 24" class="stroke-current shrink-0 w-6 h-6">
+                    <path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M13 16h-1v-4h-1m1-4h.01M21 12a9 9 0 11-18 0 9 9 0 0118 0z"></path>
+                </svg>`;
+    }
+    toast.innerHTML = `
+        ${icon}
+        <span>${message}</span>
+        <div>
+            <button class="btn btn-sm btn-ghost" onclick="this.parentElement.parentElement.remove()">
+                <svg xmlns="http://www.w3.org/2000/svg" class="h-4 w-4" fill="none" viewBox="0 0 24 24" stroke="currentColor">
+                    <path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M6 18L18 6M6 6l12 12" />
+                </svg>
+            </button>
+        </div>
+    `;
+    document.body.appendChild(toast);
+    // Auto-dismiss after 3 seconds
+    setTimeout(() => {
+        toast.classList.add('opacity-0', 'transition-opacity', 'duration-500');
+        setTimeout(() => toast.remove(), 500);
+    }, 3000);
+}
+// Extract video ID from YouTube URL
+function extractVideoId(url) {
+    const regExp = /^.*((youtu.be\/)|(v\/)|(\/u\/\w\/)|(embed\/)|(watch\?))\??v?=?([^#&?]*).*/;
+    const match = url.match(regExp);
+    return (match && match[7].length === 11) ? match[7] : null;
+}
+// Load recent videos into the footer from the API
+function loadFooterRecentVideos() {
+    const footerRecentVideos = document.getElementById('footer-recent-videos');
+    if (!footerRecentVideos) return;
+    // Show loading state
+    footerRecentVideos.innerHTML = '<p class="text-sm opacity-70">Loading recent videos...</p>';
+    // Fetch recent videos from server API
+    fetch('/api/video/recent?limit=3')
+        .then(response => {
+            if (!response.ok) {
+                throw new Error('Failed to fetch recent videos');
+            }
+            return response.json();
+        })
+        .then(videos => {
+            if (videos && videos.length > 0) {
+                // Generate HTML for recent videos
+                const videoLinks = videos.map(video => {
+                    return `
+                        <a href="/video/${video.video_id}" class="link link-hover block py-1 truncate">
+                            <span class="text-xs text-primary">▶</span> ${video.title || `Video ${video.video_id}`}
+                        </a>
+                    `;
+                }).join('');
+                // Add videos to the footer
+                footerRecentVideos.innerHTML = videoLinks;
+            } else {
+                footerRecentVideos.innerHTML = '<p class="text-sm opacity-70">No recent videos</p>';
+            }
+        })
+        .catch(error => {
+            console.error('Error loading footer videos:', error);
+            footerRecentVideos.innerHTML = '<p class="text-sm opacity-70">Failed to load recent videos</p>';
+        });
+}

app/static/js/video.js ADDED Viewed

	@@ -0,0 +1,440 @@

+// Video page functionality
+document.addEventListener('DOMContentLoaded', () => {
+    const playerElement = document.getElementById('youtube-player');
+    const searchInput = document.getElementById('search-input');
+    const searchButton = document.getElementById('search-button');
+    const transcriptContainer = document.getElementById('transcript-container');
+    const loadingIndicator = document.getElementById('loading');
+    const toggleTranscriptButton = document.getElementById('toggle-transcript');
+    let transcriptSegments = [];
+    let ytPlayer = null;
+    let isProcessingUrl = false;
+    // Check if there's a search query in the URL
+    const urlParams = new URLSearchParams(window.location.search);
+    const searchQuery = urlParams.get('q');
+    const processingUrl = urlParams.get('processing');
+    // Format time to display as HH:MM:SS
+    function formatTime(seconds) {
+        const hours = Math.floor(seconds / 3600);
+        const mins = Math.floor((seconds % 3600) / 60);
+        const secs = Math.floor(seconds % 60);
+        if (hours > 0) {
+            return `${hours}:${mins.toString().padStart(2, '0')}:${secs.toString().padStart(2, '0')}`;
+        } else {
+            return `${mins}:${secs.toString().padStart(2, '0')}`;
+        }
+    }
+    // Handle error display
+    function handleError(error) {
+        console.error(error);
+        return `<div class="alert alert-error">Error: ${error.message}</div>`;
+    }
+    // Initialize YouTube iframe API
+    function initYouTubePlayer() {
+        // Get the existing iframe
+        const iframeId = playerElement.getAttribute('id');
+        // Load the YouTube iframe API if it's not already loaded
+        if (!window.YT) {
+            const tag = document.createElement('script');
+            tag.src = 'https://www.youtube.com/iframe_api';
+            const firstScriptTag = document.getElementsByTagName('script')[0];
+            firstScriptTag.parentNode.insertBefore(tag, firstScriptTag);
+            window.onYouTubeIframeAPIReady = function() {
+                createYouTubePlayer(iframeId);
+            };
+        } else {
+            createYouTubePlayer(iframeId);
+        }
+    }
+    // Create YouTube player object
+    function createYouTubePlayer(iframeId) {
+        ytPlayer = new YT.Player(iframeId, {
+            events: {
+                'onReady': onPlayerReady
+            }
+        });
+    }
+    // When player is ready
+    function onPlayerReady(event) {
+        console.log('Player ready');
+    }
+    // Load transcript segments
+    function loadTranscript() {
+        transcriptContainer.innerHTML = '<div class="flex justify-center my-4"><span class="loading loading-spinner loading-md"></span><span class="ml-2">Loading transcript...</span></div>';
+        // Check if video ID is valid before making API call
+        if (!videoId || videoId === 'undefined' || videoId === 'null') {
+            transcriptContainer.innerHTML = `
+                <div class="alert alert-error">
+                    <div>
+                        <svg xmlns="http://www.w3.org/2000/svg" class="stroke-current shrink-0 h-6 w-6" fill="none" viewBox="0 0 24 24"><path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M10 14l2-2m0 0l2-2m-2 2l-2-2m2 2l2 2m7-2a9 9 0 11-18 0 9 9 0 0118 0z" /></svg>
+                        <span>Invalid video ID. Please return to the home page and select a valid video.</span>
+                    </div>
+                </div>
+            `;
+            return;
+        }
+        fetch(`/api/video/segments/${videoId}`)
+            .then(response => {
+                if (!response.ok) {
+                    throw new Error('Failed to load transcript: ' + response.status);
+                }
+                return response.json();
+            })
+            .then(segments => {
+                transcriptSegments = segments;
+                if (!segments || segments.length === 0) {
+                    transcriptContainer.innerHTML = `
+                        <div class="alert alert-info">
+                            <div>
+                                <svg xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 24 24" class="stroke-current shrink-0 w-6 h-6"><path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M13 16h-1v-4h-1m1-4h.01M21 12a9 9 0 11-18 0 9 9 0 0118 0z"></path></svg>
+                                <span>No transcript available for this video. Try processing the video first from the home page.</span>
+                            </div>
+                        </div>
+                    `;
+                } else {
+                    displayTranscript(segments);
+                }
+            })
+            .catch(error => {
+                console.error('Error loading transcript:', error);
+                transcriptContainer.innerHTML = `
+                    <div class="alert alert-error">
+                        <div>
+                            <svg xmlns="http://www.w3.org/2000/svg" class="stroke-current shrink-0 h-6 w-6" fill="none" viewBox="0 0 24 24"><path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M10 14l2-2m0 0l2-2m-2 2l-2-2m2 2l2 2m7-2a9 9 0 11-18 0 9 9 0 0118 0z" /></svg>
+                            <span>Error loading transcript: ${error.message}</span>
+                        </div>
+                    </div>
+                    <p class="mt-4">This may happen if:</p>
+                    <ul class="list-disc ml-8 mt-2">
+                        <li>The video hasn't been processed yet</li>
+                        <li>The video ID is incorrect</li>
+                        <li>The server is experiencing issues</li>
+                    </ul>
+                    <p class="mt-4">Try processing this video from the home page first.</p>
+                `;
+            });
+    }
+    // Display transcript segments
+    function displayTranscript(segments) {
+        const html = segments.map((segment, index) => {
+            const formattedTime = formatTime(segment.start);
+            return `
+                <div class="transcript-segment" data-start="${segment.start}" data-end="${segment.end}" data-index="${index}">
+                    <span class="timestamp">${formattedTime}</span>
+                    <span class="segment-text">${segment.text}</span>
+                </div>
+            `;
+        }).join('');
+        transcriptContainer.innerHTML = html;
+        // Add click handlers to segments
+        document.querySelectorAll('.transcript-segment').forEach(segment => {
+            segment.addEventListener('click', () => {
+                const startTime = parseFloat(segment.dataset.start);
+                seekToTime(startTime);
+            });
+        });
+    }
+    // Seek to specific time in the video
+    function seekToTime(seconds) {
+        console.log('Seeking to time:', seconds);
+        if (ytPlayer && typeof ytPlayer.seekTo === 'function') {
+            try {
+                // Ensure seconds is a number
+                seconds = parseFloat(seconds);
+                // Seek to time
+                ytPlayer.seekTo(seconds, true);
+                // Try to play the video (may be blocked by browser autoplay policies)
+                try {
+                    ytPlayer.playVideo();
+                } catch (e) {
+                    console.warn('Could not autoplay video:', e);
+                }
+                // Highlight the current segment
+                highlightSegment(seconds);
+            } catch (error) {
+                console.error('Error seeking to time:', error);
+            }
+        } else {
+            console.error('YouTube player is not ready yet or seekTo method is not available');
+        }
+    }
+    // Highlight segment containing the current time
+    function highlightSegment(time) {
+        // Remove highlight from all segments
+        document.querySelectorAll('.transcript-segment').forEach(segment => {
+            segment.classList.remove('highlight');
+        });
+        // Find the segment containing current time
+        // Need to find by approximate match since floating point exact matches may not work
+        const segments = document.querySelectorAll('.transcript-segment');
+        let currentSegment = null;
+        for (const segment of segments) {
+            const start = parseFloat(segment.dataset.start);
+            const end = parseFloat(segment.dataset.end);
+            if (time >= start && time <= end) {
+                currentSegment = segment;
+                break;
+            }
+        }
+        // If exact time match not found, find the closest segment
+        if (!currentSegment) {
+            const exactMatch = document.querySelector(`.transcript-segment[data-start="${time}"]`);
+            if (exactMatch) {
+                currentSegment = exactMatch;
+            }
+        }
+        if (currentSegment) {
+            currentSegment.classList.add('highlight');
+            currentSegment.scrollIntoView({ behavior: 'smooth', block: 'center' });
+        }
+    }
+    // Search functionality
+    searchButton.addEventListener('click', performSearch);
+    searchInput.addEventListener('keypress', e => {
+        if (e.key === 'Enter') performSearch();
+    });
+    function performSearch() {
+        const query = searchInput.value.trim();
+        if (!query) {
+            transcriptContainer.innerHTML = '<div class="alert alert-warning">Please enter a search query</div>';
+            return;
+        }
+        // Validate video ID before searching
+        if (!videoId || videoId === 'undefined' || videoId === 'null') {
+            transcriptContainer.innerHTML = `
+                <div class="alert alert-error">
+                    <div>
+                        <svg xmlns="http://www.w3.org/2000/svg" class="stroke-current shrink-0 h-6 w-6" fill="none" viewBox="0 0 24 24"><path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M10 14l2-2m0 0l2-2m-2 2l-2-2m2 2l2 2m7-2a9 9 0 11-18 0 9 9 0 0118 0z" /></svg>
+                        <span>Invalid video ID. Please return to the home page and select a valid video.</span>
+                    </div>
+                </div>
+            `;
+            return;
+        }
+        // Show loading indicator
+        loadingIndicator.classList.remove('hidden');
+        // Send search request
+        fetch(`/api/video/search?query=${encodeURIComponent(query)}&video_id=${videoId}`)
+            .then(response => {
+                if (!response.ok) {
+                    throw new Error('Search failed');
+                }
+                return response.json();
+            })
+            .then(results => {
+                // Hide loading indicator
+                loadingIndicator.classList.add('hidden');
+                if (results.length === 0) {
+                    // Show "no results" message in transcript container
+                    transcriptContainer.innerHTML = `
+                        <div role="alert" class="alert alert-info">
+                            <svg xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 24 24" class="stroke-current shrink-0 w-6 h-6">
+                                <path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M13 16h-1v-4h-1m1-4h.01M21 12a9 9 0 11-18 0 9 9 0 0118 0z"></path>
+                            </svg>
+                            <span>No results found for "${query}". <a href="#" id="reset-search" class="link link-primary">Show all transcript</a></span>
+                        </div>`;
+                    // Add click handler to reset search link
+                    document.getElementById('reset-search').addEventListener('click', (e) => {
+                        e.preventDefault();
+                        resetTranscriptFilter();
+                        displayTranscript(transcriptSegments);
+                    });
+                    return;
+                }
+                // Display search results as filtered transcript
+                filterTranscript(results);
+                // Add a header with search info and reset option
+                const searchInfoHeader = document.createElement('div');
+                searchInfoHeader.className = 'mb-4 flex justify-between items-center';
+                searchInfoHeader.innerHTML = `
+                    <div class="badge badge-accent">${results.length} results for "${query}"</div>
+                    <a href="#" id="reset-search" class="link link-primary text-sm">Show all transcript</a>
+                `;
+                // Insert the header before transcript segments
+                transcriptContainer.insertBefore(searchInfoHeader, transcriptContainer.firstChild);
+                // Add click handler to reset search link
+                document.getElementById('reset-search').addEventListener('click', (e) => {
+                    e.preventDefault();
+                    resetTranscriptFilter();
+                    displayTranscript(transcriptSegments);
+                });
+            })
+            .catch(error => {
+                // Hide loading indicator
+                loadingIndicator.classList.add('hidden');
+                // Show error
+                transcriptContainer.innerHTML = handleError(error);
+            });
+    }
+    // Filter transcript to show only matching segments
+    function filterTranscript(results) {
+        // Create a highlighted version of the transcript with only matching segments
+        const html = results.map(result => {
+            const segment = result.segment;
+            const formattedTime = formatTime(segment.start);
+            const score = (result.score * 100).toFixed(0);
+            const index = transcriptSegments.findIndex(s => s.segment_id === segment.segment_id);
+            return `
+                <div class="transcript-segment search-result" data-start="${segment.start}" data-end="${segment.end}" data-index="${index}">
+                    <div class="flex justify-between items-center">
+                        <span class="timestamp">${formattedTime}</span>
+                        <div class="badge badge-primary">${score}% match</div>
+                    </div>
+                    <span class="segment-text mt-1">${segment.text}</span>
+                </div>
+            `;
+        }).join('');
+        // Replace transcript with filtered results
+        transcriptContainer.innerHTML = html;
+        // Add click handlers to segments
+        document.querySelectorAll('.transcript-segment').forEach(segment => {
+            segment.addEventListener('click', () => {
+                const startTime = parseFloat(segment.dataset.start);
+                seekToTime(startTime);
+            });
+        });
+    }
+    // Transcript is always visible - toggle functionality removed
+    // Reset transcript filter to show all segments
+    function resetTranscriptFilter() {
+        searchInput.value = '';
+    }
+    // Show processing indicator if URL was just processed
+    function showProcessingIndicator() {
+        if (processingUrl === 'true') {
+            isProcessingUrl = true;
+            transcriptContainer.innerHTML = `
+                <div class="flex items-center justify-center my-4">
+                    <span class="loading loading-spinner loading-md text-primary"></span>
+                    <span class="ml-2">Processing video from URL... This may take a few moments</span>
+                </div>
+            `;
+            // Check for segments every second
+            const processingInterval = setInterval(() => {
+                // Validate video ID before making API call
+                if (!videoId || videoId === 'undefined' || videoId === 'null') {
+                    clearInterval(processingInterval);
+                    transcriptContainer.innerHTML = `
+                        <div class="alert alert-error">
+                            <div>
+                                <svg xmlns="http://www.w3.org/2000/svg" class="stroke-current shrink-0 h-6 w-6" fill="none" viewBox="0 0 24 24"><path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M10 14l2-2m0 0l2-2m-2 2l-2-2m2 2l2 2m7-2a9 9 0 11-18 0 9 9 0 0118 0z" /></svg>
+                                <span>Invalid video ID. Please return to the home page and select a valid video.</span>
+                            </div>
+                        </div>
+                    `;
+                    return;
+                }
+                fetch(`/api/video/segments/${videoId}`)
+                    .then(response => {
+                        if (!response.ok) {
+                            return null;
+                        }
+                        return response.json();
+                    })
+                    .then(segments => {
+                        if (segments && segments.length > 0) {
+                            clearInterval(processingInterval);
+                            isProcessingUrl = false;
+                            loadTranscript();
+                        }
+                    })
+                    .catch(error => {
+                        console.error('Error checking segments:', error);
+                    });
+            }, 2000);
+            // Set timeout to stop checking after 2 minutes
+            setTimeout(() => {
+                clearInterval(processingInterval);
+                if (isProcessingUrl) {
+                    transcriptContainer.innerHTML = `
+                        <div class="alert alert-warning">
+                            <div>
+                                <svg xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 24 24" class="stroke-current shrink-0 w-6 h-6"><path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M13 16h-1v-4h-1m1-4h.01M21 12a9 9 0 11-18 0 9 9 0 0118 0z"></path></svg>
+                                <span>Processing is taking longer than expected. Refresh the page to check progress.</span>
+                            </div>
+                        </div>
+                    `;
+                    isProcessingUrl = false;
+                }
+            }, 120000);
+            return true;
+        }
+        return false;
+    }
+    // Initialize
+    initYouTubePlayer();
+    // Show processing indicator or load transcript
+    if (!showProcessingIndicator()) {
+        loadTranscript();
+    }
+    // If there's a search query in the URL, apply it after transcript loads
+    if (searchQuery) {
+        const checkTranscriptInterval = setInterval(() => {
+            if (transcriptSegments.length > 0) {
+                clearInterval(checkTranscriptInterval);
+                // Set the search input value and trigger search
+                searchInput.value = searchQuery;
+                performSearch();
+            }
+        }, 500);
+        // Set timeout to stop checking after 10 seconds
+        setTimeout(() => clearInterval(checkTranscriptInterval), 10000);
+    }
+});

app/templates/base.html ADDED Viewed

	@@ -0,0 +1,82 @@

+<!DOCTYPE html>
+<html lang="en" data-theme="light">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>{{ title }}</title>
+    <link href="https://cdn.jsdelivr.net/npm/[email protected]/dist/full.css" rel="stylesheet" type="text/css" />
+    <script src="https://cdn.tailwindcss.com"></script>
+    <link rel="stylesheet" href="{{ url_for('static', path='/css/style.css') }}">
+</head>
+<body class="min-h-screen flex flex-col">
+    <!-- Header/Navbar -->
+    <div class="navbar bg-base-200 shadow-md">
+        <div class="navbar-start">
+            <a href="/" class="btn btn-ghost text-xl">
+                <svg xmlns="http://www.w3.org/2000/svg" class="h-6 w-6 mr-2" fill="none" viewBox="0 0 24 24" stroke="currentColor">
+                    <path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M15 10l4.553-2.276A1 1 0 0121 8.618v6.764a1 1 0 01-1.447.894L15 14M5 18h8a2 2 0 002-2V8a2 2 0 00-2-2H5a2 2 0 00-2 2v8a2 2 0 002 2z" />
+                </svg>
+                In-Video Search
+            </a>
+        </div>
+        <div class="navbar-center">
+            <div class="form-control">
+                <div class="join">
+                    <input type="text" id="global-search" placeholder="Search videos..." class="input input-bordered join-item w-full md:w-96" />
+                    <button id="global-search-button" class="btn btn-primary join-item">
+                        <svg xmlns="http://www.w3.org/2000/svg" class="h-5 w-5" fill="none" viewBox="0 0 24 24" stroke="currentColor">
+                            <path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M21 21l-6-6m2-5a7 7 0 11-14 0 7 7 0 0114 0z" />
+                        </svg>
+                    </button>
+                </div>
+            </div>
+        </div>
+        <div class="navbar-end">
+            <div class="dropdown dropdown-end">
+                <div tabindex="0" role="button" class="btn btn-ghost btn-circle">
+                    <svg xmlns="http://www.w3.org/2000/svg" class="h-5 w-5" fill="none" viewBox="0 0 24 24" stroke="currentColor">
+                        <path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M12 3v1m0 16v1m9-9h-1M4 12H3m15.364 6.364l-.707-.707M6.343 6.343l-.707-.707m12.728 0l-.707.707M6.343 17.657l-.707.707M16 12a4 4 0 11-8 0 4 4 0 018 0z" />
+                    </svg>
+                </div>
+                <ul tabindex="0" class="dropdown-content z-[1] menu p-2 shadow bg-base-100 rounded-box w-52">
+                    <li><button class="theme-item" data-theme="light">Light</button></li>
+                    <li><button class="theme-item" data-theme="dark">Dark</button></li>
+                    <li><button class="theme-item" data-theme="cupcake">Cupcake</button></li>
+                    <li><button class="theme-item" data-theme="synthwave">Synthwave</button></li>
+                </ul>
+            </div>
+        </div>
+    </div>
+    <!-- Main Content -->
+    <main class="container mx-auto px-4 py-8 flex-grow">
+        {% block content %}{% endblock %}
+    </main>
+    <!-- Footer -->
+    <footer class="footer p-10 bg-base-200 text-base-content">
+        <div>
+            <span class="footer-title">In-Video Search</span>
+            <p>Powered by Qdrant & FastAPI</p>
+            <p>Search through video content semantically</p>
+        </div>
+        <div>
+            <span class="footer-title">Recent Videos</span>
+            <div id="footer-recent-videos">
+                <!-- Recent videos will be loaded here by JavaScript -->
+                <p class="text-sm opacity-70">No recent videos</p>
+            </div>
+        </div>
+        <div>
+            <span class="footer-title">Resources</span>
+            <a class="link link-hover" href="https://qdrant.tech/" target="_blank">Qdrant</a>
+            <a class="link link-hover" href="https://fastapi.tiangolo.com/" target="_blank">FastAPI</a>
+            <a class="link link-hover" href="https://daisyui.com/" target="_blank">DaisyUI</a>
+        </div>
+    </footer>
+    <!-- Scripts -->
+    <script src="{{ url_for('static', path='/js/main.js') }}"></script>
+    {% block scripts %}{% endblock %}
+</body>
+</html>

app/templates/index.html ADDED Viewed

	@@ -0,0 +1,98 @@

+{% extends "base.html" %}
+{% block content %}
+<div class="max-w-4xl mx-auto">
+    <div class="card bg-base-100 shadow-xl">
+        <div class="card-body">
+            <h2 class="card-title">Process YouTube Video</h2>
+            <p class="text-gray-600 mb-4">Enter a YouTube URL to process its transcript for searching</p>
+            <div class="form-control">
+                <label class="label">
+                    <span class="label-text">Enter YouTube URL</span>
+                </label>
+                <div class="join w-full">
+                    <input type="text" id="youtube-url" placeholder="https://www.youtube.com/watch?v=..." class="input input-bordered join-item w-full" />
+                    <button id="process-button" class="btn btn-primary join-item">Process</button>
+                </div>
+            </div>
+            <div class="mt-4" id="process-status">
+                <!-- Processing status messages will appear here -->
+            </div>
+            <div class="divider">OR</div>
+            <h3 class="font-bold mb-2">Example Videos</h3>
+            <div class="grid grid-cols-1 md:grid-cols-3 gap-2">
+                <button class="btn btn-outline btn-accent btn-sm example-video w-full h-auto" data-url="https://www.youtube.com/watch?v=zjkBMFhNj_g">
+                    <div class="flex items-center w-full">
+                        <svg xmlns="http://www.w3.org/2000/svg" class="h-4 w-4 mr-2 flex-shrink-0" fill="none" viewBox="0 0 24 24" stroke="currentColor">
+                            <path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M14.752 11.168l-3.197-2.132A1 1 0 0010 9.87v4.263a1 1 0 001.555.832l3.197-2.132a1 1 0 000-1.664z" />
+                            <path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M21 12a9 9 0 11-18 0 9 9 0 0118 0z" />
+                        </svg>
+                        <span class="truncate text-left">Intro to Large Language Models</span>
+                    </div>
+                </button>
+                <button class="btn btn-outline btn-accent btn-sm example-video w-full h-auto" data-url="https://www.youtube.com/watch?v=7xTGNNLPyMI">
+                    <div class="flex items-center w-full">
+                        <svg xmlns="http://www.w3.org/2000/svg" class="h-4 w-4 mr-2 flex-shrink-0" fill="none" viewBox="0 0 24 24" stroke="currentColor">
+                            <path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M14.752 11.168l-3.197-2.132A1 1 0 0010 9.87v4.263a1 1 0 001.555.832l3.197-2.132a1 1 0 000-1.664z" />
+                            <path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M21 12a9 9 0 11-18 0 9 9 0 0118 0z" />
+                        </svg>
+                        <span class="truncate text-left">Deep Dive into LLMs like ChatGPT</span>
+                    </div>
+                </button>
+                <button class="btn btn-outline btn-accent btn-sm example-video w-full h-auto" data-url="https://www.youtube.com/watch?v=EWvNQjAaOHw">
+                    <div class="flex items-center w-full">
+                        <svg xmlns="http://www.w3.org/2000/svg" class="h-4 w-4 mr-2 flex-shrink-0" fill="none" viewBox="0 0 24 24" stroke="currentColor">
+                            <path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M14.752 11.168l-3.197-2.132A1 1 0 0010 9.87v4.263a1 1 0 001.555.832l3.197-2.132a1 1 0 000-1.664z" />
+                            <path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M21 12a9 9 0 11-18 0 9 9 0 0118 0z" />
+                        </svg>
+                        <span class="truncate text-left">How I use LLMs</span>
+                    </div>
+                </button>
+            </div>
+        </div>
+    </div>
+    <div class="card bg-base-100 shadow-xl mt-6 hidden" id="recently-processed">
+        <div class="card-body">
+            <h2 class="card-title">Recently Processed Videos</h2>
+            <div class="mt-4">
+                <!-- Video carousel with navigation arrows -->
+                <div class="flex items-center gap-2">
+                    <!-- Left arrow navigation -->
+                    <div class="hidden md:block" id="carousel-prev">
+                        <button class="btn btn-circle btn-primary btn-disabled">
+                            <svg xmlns="http://www.w3.org/2000/svg" class="h-6 w-6" fill="none" viewBox="0 0 24 24" stroke="currentColor">
+                                <path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M15 19l-7-7 7-7" />
+                            </svg>
+                        </button>
+                    </div>
+                    <!-- Carousel content -->
+                    <div class="carousel carousel-center rounded-box w-full p-2 overflow-x-auto">
+                        <div id="video-list" class="flex space-x-4 items-stretch">
+                            <!-- Video cards will be populated here as carousel items -->
+                        </div>
+                    </div>
+                    <!-- Right arrow navigation -->
+                    <div class="hidden md:block" id="carousel-next">
+                        <button class="btn btn-circle btn-primary">
+                            <svg xmlns="http://www.w3.org/2000/svg" class="h-6 w-6" fill="none" viewBox="0 0 24 24" stroke="currentColor">
+                                <path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M9 5l7 7-7 7" />
+                            </svg>
+                        </button>
+                    </div>
+                </div>
+            </div>
+        </div>
+    </div>
+</div>
+{% endblock %}
+{% block scripts %}
+<script src="{{ url_for('static', path='/js/index.js') }}"></script>
+{% endblock %}

app/templates/video.html ADDED Viewed

	@@ -0,0 +1,62 @@

+{% extends "base.html" %}
+{% block content %}
+<div class="grid grid-cols-1 lg:grid-cols-2 gap-6">
+    <div class="lg:col-span-1">
+        <div class="card bg-base-100 shadow-xl">
+            <div class="card-body p-4">
+                <div class="aspect-video">
+                    <iframe id="youtube-player" class="w-full h-full"
+                        src="https://www.youtube.com/embed/{{ video_id }}?enablejsapi=1"
+                        frameborder="0"
+                        allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture"
+                        allowfullscreen>
+                    </iframe>
+                </div>
+            </div>
+        </div>
+    </div>
+    <div class="lg:col-span-1">
+        <div class="card bg-base-100 shadow-xl sticky top-4">
+            <div class="card-body">
+                <div class="flex justify-between items-center">
+                    <h2 class="card-title">Video Transcript</h2>
+                </div>
+                <div class="form-control mb-4">
+                    <label class="label">
+                        <span class="label-text">Search in transcript</span>
+                    </label>
+                    <div class="join w-full">
+                        <input type="text" id="search-input" placeholder="Search in this video..." class="input input-bordered join-item w-full" />
+                        <button id="search-button" class="btn btn-primary join-item">
+                            <svg xmlns="http://www.w3.org/2000/svg" class="h-5 w-5" fill="none" viewBox="0 0 24 24" stroke="currentColor">
+                                <path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M21 21l-6-6m2-5a7 7 0 11-14 0 7 7 0 0114 0z" />
+                            </svg>
+                            Search
+                        </button>
+                    </div>
+                </div>
+                <div id="loading" class="hidden mt-2 mb-2">
+                    <span class="loading loading-spinner loading-md"></span>
+                    <span class="ml-2">Searching...</span>
+                </div>
+                <div id="transcript-container" class="mt-2 transcript-container">
+                    <!-- Transcript will be loaded here -->
+                </div>
+            </div>
+        </div>
+    </div>
+</div>
+{% endblock %}
+{% block scripts %}
+<script>
+    // Store the video ID in a JavaScript variable
+    const videoId = "{{ video_id }}";
+</script>
+<script src="{{ url_for('static', path='/js/video.js') }}"></script>
+{% endblock %}

docker-compose.yml ADDED Viewed

	@@ -0,0 +1,28 @@

+version: '3.8'
+services:
+  app:
+    build: .
+    ports:
+      - "8000:8000"
+    environment:
+      - QDRANT_URL=http://qdrant:6333
+      - WORKERS=4  # Set number of workers
+      # - QDRANT_API_KEY=your_api_key_here (uncomment and set if needed)
+    depends_on:
+      - qdrant
+    restart: unless-stopped
+    healthcheck:
+      test: ["CMD", "curl", "-f", "http://localhost:8000/"]
+      interval: 30s
+      timeout: 10s
+      retries: 3
+      start_period: 40s
+  qdrant:
+    image: qdrant/qdrant:v1.13.5
+    volumes:
+      - qdrant_data:/qdrant/storage
+volumes:
+  qdrant_data:

example.env ADDED Viewed

	@@ -0,0 +1,3 @@

+# Qdrant Configuration
+QDRANT_URL=http://localhost:6333
+QDRANT_API_KEY=

gunicorn.conf.py ADDED Viewed

	@@ -0,0 +1,32 @@

+import os
+import multiprocessing
+# Get the number of workers from environment variable or calculate based on CPU cores
+workers_env = os.getenv("WORKERS")
+if workers_env:
+    workers = int(workers_env)
+else:
+    # Use the recommended formula: (2 * CPU cores) + 1
+    workers = (2 * multiprocessing.cpu_count()) + 1
+# Use Uvicorn worker class for ASGI support
+worker_class = "uvicorn.workers.UvicornWorker"
+# Bind to 0.0.0.0:8000
+bind = "0.0.0.0:8000"
+# Logging
+accesslog = "-"  # Log to stdout
+errorlog = "-"   # Log to stderr
+loglevel = "info"
+# Timeout configuration
+timeout = 120  # 2 minutes
+graceful_timeout = 30
+# Worker settings
+worker_connections = 1000  # Maximum number of connections each worker can handle
+keepalive = 5  # Seconds to wait between client requests before closing connection
+# For better performance with Uvicorn
+proc_name = "vibe-coding-rag"

poetry.lock CHANGED Viewed

The diff for this file is too large to render. See raw diff

pyproject.toml CHANGED Viewed

@@ -7,16 +7,27 @@ readme = "README.md"
 package-mode = false
 [tool.poetry.dependencies]
-python = "^3.10"
 torch = {version = "^2.6.0+cpu", source = "pytorch-cpu"}
 sentence-transformers = "^3.4.1"
 qdrant-client = "^1.13.3"
 [[tool.poetry.source]]
 name = "pytorch-cpu"
 url = "https://download.pytorch.org/whl/cpu"
 priority = "explicit"
 [build-system]
 requires = ["poetry-core"]
 build-backend = "poetry.core.masonry.api"

 package-mode = false
 [tool.poetry.dependencies]
+python = "^3.10,<3.14"
 torch = {version = "^2.6.0+cpu", source = "pytorch-cpu"}
 sentence-transformers = "^3.4.1"
 qdrant-client = "^1.13.3"
+fastapi = "^0.115.11"
+uvicorn = "^0.34.0"
+gunicorn = "^21.2.0"
+jinja2 = "^3.1.6"
+youtube-transcript-api = "^1.0.2"
+pytube = "^15.0.0"
+yt-dlp = "^2025.2.19"
 [[tool.poetry.source]]
 name = "pytorch-cpu"
 url = "https://download.pytorch.org/whl/cpu"
 priority = "explicit"
+[tool.poetry.group.dev.dependencies]
+ruff = "^0.11.0"
 [build-system]
 requires = ["poetry-core"]
 build-backend = "poetry.core.masonry.api"