Spaces:
Running
Running
Commit
·
74cf6bd
1
Parent(s):
958cc77
Vibe coded implementation (with some manual fixes)
Browse files(cherry picked from commit 255acda8c8bcb989fd72006b84dee18553468356)
- .dockerignore +49 -0
- CLAUDE.md +33 -0
- Dockerfile +41 -0
- app/__init__.py +1 -0
- app/api/__init__.py +1 -0
- app/api/router.py +6 -0
- app/api/video.py +143 -0
- app/main.py +63 -0
- app/models/__init__.py +1 -0
- app/models/video.py +32 -0
- app/services/__init__.py +1 -0
- app/services/qdrant_service.py +41 -0
- app/services/video_service.py +647 -0
- app/static/css/style.css +137 -0
- app/static/js/index.js +268 -0
- app/static/js/main.js +139 -0
- app/static/js/video.js +440 -0
- app/templates/base.html +82 -0
- app/templates/index.html +98 -0
- app/templates/video.html +62 -0
- docker-compose.yml +28 -0
- example.env +3 -0
- gunicorn.conf.py +32 -0
- poetry.lock +0 -0
- pyproject.toml +12 -1
.dockerignore
ADDED
@@ -0,0 +1,49 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Git
|
2 |
+
.git
|
3 |
+
.gitignore
|
4 |
+
|
5 |
+
# Python
|
6 |
+
__pycache__/
|
7 |
+
*.py[cod]
|
8 |
+
*$py.class
|
9 |
+
*.so
|
10 |
+
.Python
|
11 |
+
env/
|
12 |
+
build/
|
13 |
+
develop-eggs/
|
14 |
+
dist/
|
15 |
+
downloads/
|
16 |
+
eggs/
|
17 |
+
.eggs/
|
18 |
+
lib/
|
19 |
+
lib64/
|
20 |
+
parts/
|
21 |
+
sdist/
|
22 |
+
var/
|
23 |
+
*.egg-info/
|
24 |
+
.installed.cfg
|
25 |
+
*.egg
|
26 |
+
|
27 |
+
# Virtual environment
|
28 |
+
venv/
|
29 |
+
.env
|
30 |
+
.venv/
|
31 |
+
ENV/
|
32 |
+
|
33 |
+
# Docker
|
34 |
+
.dockerignore
|
35 |
+
Dockerfile
|
36 |
+
docker-compose.yml
|
37 |
+
|
38 |
+
# IDE
|
39 |
+
.idea/
|
40 |
+
.vscode/
|
41 |
+
*.swp
|
42 |
+
*.swo
|
43 |
+
|
44 |
+
# Misc
|
45 |
+
.DS_Store
|
46 |
+
.pytest_cache/
|
47 |
+
htmlcov/
|
48 |
+
.coverage
|
49 |
+
.tox/
|
CLAUDE.md
ADDED
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Development Guidelines for Vibe Coding RAG
|
2 |
+
|
3 |
+
## Commands
|
4 |
+
- Build/Install: `poetry install`
|
5 |
+
- Run: `poetry run python -m app.main` (once app is created)
|
6 |
+
- Lint: `poetry run ruff check .`
|
7 |
+
- Format: `poetry run ruff format .`
|
8 |
+
- Test: `poetry run pytest`
|
9 |
+
- Run single test: `poetry run pytest path/to/test.py::test_function_name -v`
|
10 |
+
|
11 |
+
## Code Style
|
12 |
+
- **Imports**: Group standard library, third-party, and local imports
|
13 |
+
- **Formatting**: Use Black/Ruff compatible formatting
|
14 |
+
- **Types**: Use type annotations for function parameters and return values
|
15 |
+
- **Naming**:
|
16 |
+
- Variables/functions: snake_case
|
17 |
+
- Classes: PascalCase
|
18 |
+
- Constants: UPPER_SNAKE_CASE
|
19 |
+
- **Error Handling**: Use try/except with specific exceptions
|
20 |
+
- **Documentation**: Docstrings for all public functions and classes
|
21 |
+
|
22 |
+
## Technologies
|
23 |
+
- Vector DB: Qdrant
|
24 |
+
- Embeddings: SentenceTransformers with sentence-transformers/static-retrieval-mrl-en-v1
|
25 |
+
- API: FastAPI (when implemented)
|
26 |
+
- Frontend: HTML/CSS/JavaScript with DaisyUI components
|
27 |
+
|
28 |
+
## MCP Integration
|
29 |
+
- Always call qdrant-code-search find tool when you are about to generate frontend code (HTML/CSS/JS)
|
30 |
+
- Store generated code snippets in qdrant-code-search store tool for future reference
|
31 |
+
|
32 |
+
## Qdrant
|
33 |
+
- Point IDs have to be string-like UUIDs
|
Dockerfile
ADDED
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
FROM python:3.10-slim
|
2 |
+
|
3 |
+
# Install system dependencies
|
4 |
+
RUN apt-get update && apt-get install -y --no-install-recommends \
|
5 |
+
curl \
|
6 |
+
&& rm -rf /var/lib/apt/lists/*
|
7 |
+
|
8 |
+
WORKDIR /app
|
9 |
+
|
10 |
+
# Install Poetry
|
11 |
+
RUN pip install poetry==1.8.3
|
12 |
+
|
13 |
+
# Copy poetry configuration files
|
14 |
+
COPY pyproject.toml poetry.lock poetry.toml* ./
|
15 |
+
|
16 |
+
# Configure poetry to not create a virtual environment
|
17 |
+
RUN poetry config virtualenvs.create false
|
18 |
+
|
19 |
+
# Install dependencies
|
20 |
+
RUN poetry install --no-dev --no-interaction --no-ansi
|
21 |
+
|
22 |
+
# Copy application code
|
23 |
+
COPY app ./app
|
24 |
+
|
25 |
+
# Expose port
|
26 |
+
EXPOSE 8000
|
27 |
+
|
28 |
+
# Set environment variables
|
29 |
+
ENV PYTHONPATH=/app
|
30 |
+
ENV QDRANT_URL=http://localhost:6333
|
31 |
+
# ENV QDRANT_API_KEY=your_api_key_here (uncomment and set if needed)
|
32 |
+
|
33 |
+
# Calculate the number of workers based on available CPUs
|
34 |
+
# Using the recommended formula: (2 * CPU cores) + 1
|
35 |
+
ENV WORKERS=4
|
36 |
+
|
37 |
+
# Create gunicorn config file
|
38 |
+
COPY gunicorn.conf.py ./
|
39 |
+
|
40 |
+
# Command to run the application with Gunicorn and Uvicorn workers
|
41 |
+
CMD ["gunicorn", "app.main:app", "-c", "gunicorn.conf.py"]
|
app/__init__.py
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
# Initialize app package
|
app/api/__init__.py
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
# Initialize API package
|
app/api/router.py
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from fastapi import APIRouter
|
2 |
+
from app.api import video
|
3 |
+
|
4 |
+
router = APIRouter()
|
5 |
+
|
6 |
+
router.include_router(video.router, prefix="/video", tags=["video"])
|
app/api/video.py
ADDED
@@ -0,0 +1,143 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from fastapi import APIRouter, HTTPException, Query
|
2 |
+
from typing import List, Optional
|
3 |
+
from app.models.video import Video, SearchResult, VideoSegment
|
4 |
+
from app.services.video_service import (
|
5 |
+
process_video,
|
6 |
+
search_video_segments,
|
7 |
+
get_all_segments,
|
8 |
+
get_processed_videos,
|
9 |
+
get_video_by_id,
|
10 |
+
)
|
11 |
+
from pydantic import BaseModel
|
12 |
+
|
13 |
+
router = APIRouter()
|
14 |
+
|
15 |
+
|
16 |
+
class VideoRequest(BaseModel):
|
17 |
+
url: str
|
18 |
+
|
19 |
+
|
20 |
+
class VideoResponse(BaseModel):
|
21 |
+
"""Response model for video processing with additional status information."""
|
22 |
+
|
23 |
+
video: Video
|
24 |
+
newly_processed: bool = False
|
25 |
+
|
26 |
+
|
27 |
+
@router.post("/process", response_model=VideoResponse)
|
28 |
+
async def process_video_endpoint(video_request: VideoRequest) -> VideoResponse:
|
29 |
+
"""Process a YouTube video to extract and store transcript segments.
|
30 |
+
If the video has already been processed, returns the existing data without reprocessing."""
|
31 |
+
try:
|
32 |
+
import logging
|
33 |
+
|
34 |
+
# Get the video ID first
|
35 |
+
from app.services.video_service import extract_video_id, get_video_by_id
|
36 |
+
|
37 |
+
video_id = extract_video_id(video_request.url)
|
38 |
+
|
39 |
+
# Check if already processed
|
40 |
+
existing_video = get_video_by_id(video_id)
|
41 |
+
already_processed = existing_video is not None and existing_video.processed
|
42 |
+
|
43 |
+
if already_processed:
|
44 |
+
logging.info(f"Video {video_id} already processed, returning existing data")
|
45 |
+
return VideoResponse(video=existing_video, newly_processed=False)
|
46 |
+
|
47 |
+
# Process the video if needed
|
48 |
+
result = process_video(video_request.url)
|
49 |
+
return VideoResponse(video=result, newly_processed=True)
|
50 |
+
|
51 |
+
except Exception as e:
|
52 |
+
import logging
|
53 |
+
import traceback
|
54 |
+
|
55 |
+
logging.error(f"Error processing video URL {video_request.url}: {str(e)}")
|
56 |
+
logging.error(traceback.format_exc())
|
57 |
+
raise HTTPException(status_code=500, detail=str(e))
|
58 |
+
|
59 |
+
|
60 |
+
@router.get("/search")
|
61 |
+
async def search_video_endpoint(
|
62 |
+
query: str = Query(..., description="Search query for video content"),
|
63 |
+
video_id: Optional[str] = Query(
|
64 |
+
None, description="Optional YouTube video ID to limit search"
|
65 |
+
),
|
66 |
+
limit: int = Query(5, description="Maximum number of results to return"),
|
67 |
+
) -> List[SearchResult]:
|
68 |
+
"""Search for video segments based on the provided query."""
|
69 |
+
import logging
|
70 |
+
|
71 |
+
# Check for invalid video_id
|
72 |
+
if video_id and (video_id.lower() == "undefined" or video_id.lower() == "null"):
|
73 |
+
logging.warning(f"Invalid video_id in search request: '{video_id}'")
|
74 |
+
video_id = None # Clear invalid video_id to perform a global search instead
|
75 |
+
|
76 |
+
try:
|
77 |
+
results = search_video_segments(query, video_id, limit)
|
78 |
+
return results
|
79 |
+
except Exception as e:
|
80 |
+
logging.error(
|
81 |
+
f"Error searching for query '{query}' with video_id '{video_id}': {str(e)}"
|
82 |
+
)
|
83 |
+
raise HTTPException(status_code=500, detail=str(e))
|
84 |
+
|
85 |
+
|
86 |
+
@router.get("/segments/{video_id}")
|
87 |
+
async def get_segments_endpoint(video_id: str) -> List[VideoSegment]:
|
88 |
+
"""Get all segments for a specific video, ordered by start time."""
|
89 |
+
import logging
|
90 |
+
|
91 |
+
# Check for invalid video ID
|
92 |
+
if not video_id or video_id.lower() == "undefined" or video_id.lower() == "null":
|
93 |
+
logging.warning(f"Invalid video ID requested: '{video_id}'")
|
94 |
+
return [] # Return empty list for invalid IDs to avoid frontend errors
|
95 |
+
|
96 |
+
try:
|
97 |
+
segments = get_all_segments(video_id)
|
98 |
+
if not segments:
|
99 |
+
# Return an empty list instead of 404 to allow frontend to handle gracefully
|
100 |
+
return []
|
101 |
+
return segments
|
102 |
+
except Exception as e:
|
103 |
+
# Log the exception for debugging
|
104 |
+
logging.error(f"Error getting segments for video {video_id}: {str(e)}")
|
105 |
+
raise HTTPException(
|
106 |
+
status_code=500, detail=f"Could not retrieve video segments: {str(e)}"
|
107 |
+
)
|
108 |
+
|
109 |
+
|
110 |
+
@router.get("/recent")
|
111 |
+
async def get_recent_videos_endpoint(
|
112 |
+
limit: int = Query(10, description="Maximum number of videos to return"),
|
113 |
+
) -> List[Video]:
|
114 |
+
"""Get recently processed videos ordered by creation time."""
|
115 |
+
try:
|
116 |
+
videos = get_processed_videos(limit=limit)
|
117 |
+
return videos
|
118 |
+
except Exception as e:
|
119 |
+
# Log the exception for debugging
|
120 |
+
import logging
|
121 |
+
|
122 |
+
logging.error(f"Error getting recent videos: {str(e)}")
|
123 |
+
raise HTTPException(
|
124 |
+
status_code=500, detail=f"Could not retrieve recent videos: {str(e)}"
|
125 |
+
)
|
126 |
+
|
127 |
+
|
128 |
+
@router.get("/info/{video_id}")
|
129 |
+
async def get_video_info_endpoint(video_id: str) -> Video:
|
130 |
+
"""Get metadata for a specific video."""
|
131 |
+
try:
|
132 |
+
video = get_video_by_id(video_id)
|
133 |
+
if not video:
|
134 |
+
# Return a basic video object if not found in database
|
135 |
+
return Video(video_id=video_id, title=f"Video {video_id}")
|
136 |
+
return video
|
137 |
+
except Exception as e:
|
138 |
+
import logging
|
139 |
+
|
140 |
+
logging.error(f"Error getting video info for {video_id}: {str(e)}")
|
141 |
+
raise HTTPException(
|
142 |
+
status_code=500, detail=f"Could not retrieve video info: {str(e)}"
|
143 |
+
)
|
app/main.py
ADDED
@@ -0,0 +1,63 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from fastapi import FastAPI, Request
|
2 |
+
from fastapi.staticfiles import StaticFiles
|
3 |
+
from fastapi.templating import Jinja2Templates
|
4 |
+
from fastapi.responses import HTMLResponse, RedirectResponse
|
5 |
+
from fastapi.middleware.cors import CORSMiddleware
|
6 |
+
from app.api import router as api_router
|
7 |
+
from app.services.video_service import get_video_by_id
|
8 |
+
|
9 |
+
app = FastAPI(title="In-Video Search", docs_url=None, redoc_url=None, openapi_url=None)
|
10 |
+
|
11 |
+
# Enable CORS
|
12 |
+
app.add_middleware(
|
13 |
+
CORSMiddleware,
|
14 |
+
allow_origins=["*"], # Adjust this in production
|
15 |
+
allow_credentials=True,
|
16 |
+
allow_methods=["*"],
|
17 |
+
allow_headers=["*"],
|
18 |
+
)
|
19 |
+
|
20 |
+
# Mount static files
|
21 |
+
app.mount("/static", StaticFiles(directory="app/static"), name="static")
|
22 |
+
|
23 |
+
# Templates
|
24 |
+
templates = Jinja2Templates(directory="app/templates")
|
25 |
+
|
26 |
+
|
27 |
+
@app.get("/", response_class=HTMLResponse)
|
28 |
+
async def index(request: Request):
|
29 |
+
return templates.TemplateResponse(
|
30 |
+
"index.html", {"request": request, "title": "In-Video Search"}
|
31 |
+
)
|
32 |
+
|
33 |
+
|
34 |
+
@app.get("/video/{video_id}", response_class=HTMLResponse)
|
35 |
+
async def video_page(request: Request, video_id: str):
|
36 |
+
# Try to get video info from database
|
37 |
+
video = get_video_by_id(video_id)
|
38 |
+
title = "Video Player"
|
39 |
+
|
40 |
+
# If video exists and has a title, use it
|
41 |
+
if video and video.title:
|
42 |
+
title = video.title
|
43 |
+
|
44 |
+
return templates.TemplateResponse(
|
45 |
+
"video.html",
|
46 |
+
{"request": request, "title": title, "video_id": video_id},
|
47 |
+
)
|
48 |
+
|
49 |
+
|
50 |
+
@app.get("/watch")
|
51 |
+
async def watch_redirect(request: Request, v: str):
|
52 |
+
# Redirect YouTube-style URLs to our video page
|
53 |
+
return RedirectResponse(url=f"/video/{v}")
|
54 |
+
|
55 |
+
|
56 |
+
# Include API routers
|
57 |
+
app.include_router(api_router.router, prefix="/api")
|
58 |
+
|
59 |
+
|
60 |
+
if __name__ == "__main__":
|
61 |
+
import uvicorn
|
62 |
+
|
63 |
+
uvicorn.run("app.main:app", host="0.0.0.0", port=8000, reload=True)
|
app/models/__init__.py
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
# Initialize models package
|
app/models/video.py
ADDED
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from pydantic import BaseModel, Field
|
2 |
+
from typing import Optional
|
3 |
+
|
4 |
+
|
5 |
+
class VideoSegment(BaseModel):
|
6 |
+
"""Model for a video segment with transcript."""
|
7 |
+
|
8 |
+
text: str = Field(..., description="Transcript text of the segment")
|
9 |
+
start: float = Field(..., description="Start time in seconds")
|
10 |
+
end: float = Field(..., description="End time in seconds")
|
11 |
+
segment_id: str = Field(..., description="Unique identifier for the segment")
|
12 |
+
video_id: str = Field(..., description="YouTube video ID this segment belongs to")
|
13 |
+
|
14 |
+
|
15 |
+
class Video(BaseModel):
|
16 |
+
"""Model for a YouTube video with metadata."""
|
17 |
+
|
18 |
+
video_id: str = Field(..., description="YouTube video ID")
|
19 |
+
title: Optional[str] = Field(None, description="Video title")
|
20 |
+
description: Optional[str] = Field(None, description="Video description")
|
21 |
+
channel: Optional[str] = Field(None, description="Channel name")
|
22 |
+
processed: bool = Field(False, description="Whether the video has been processed")
|
23 |
+
created_at: Optional[int] = Field(
|
24 |
+
None, description="Unix timestamp (seconds since epoch) when the video was processed"
|
25 |
+
)
|
26 |
+
|
27 |
+
|
28 |
+
class SearchResult(BaseModel):
|
29 |
+
"""Model for a video segment search result."""
|
30 |
+
|
31 |
+
score: float = Field(..., description="Similarity score")
|
32 |
+
segment: VideoSegment = Field(..., description="The matching video segment")
|
app/services/__init__.py
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
# Initialize services package
|
app/services/qdrant_service.py
ADDED
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
from qdrant_client import QdrantClient
|
3 |
+
import logging
|
4 |
+
|
5 |
+
|
6 |
+
def get_qdrant_client() -> QdrantClient:
|
7 |
+
"""
|
8 |
+
Initialize a Qdrant client using environment variables or default to localhost.
|
9 |
+
|
10 |
+
Environment variables:
|
11 |
+
- QDRANT_URL: URL for Qdrant server (default: http://localhost:6333)
|
12 |
+
- QDRANT_API_KEY: Optional API key for authentication
|
13 |
+
|
14 |
+
Returns:
|
15 |
+
QdrantClient: Configured Qdrant client
|
16 |
+
"""
|
17 |
+
# Get configuration from environment variables with defaults
|
18 |
+
url = os.getenv("QDRANT_URL", "http://localhost:6333")
|
19 |
+
api_key = os.getenv("QDRANT_API_KEY")
|
20 |
+
|
21 |
+
# Configure client with or without API key
|
22 |
+
if api_key:
|
23 |
+
client = QdrantClient(location=url, api_key=api_key)
|
24 |
+
logging.info(f"Connecting to Qdrant at {url} with API key")
|
25 |
+
else:
|
26 |
+
client = QdrantClient(location=url)
|
27 |
+
logging.info(f"Connecting to Qdrant at {url}")
|
28 |
+
|
29 |
+
# Test connection
|
30 |
+
try:
|
31 |
+
client.get_collections()
|
32 |
+
logging.info(f"Successfully connected to Qdrant at {url}")
|
33 |
+
except Exception as e:
|
34 |
+
logging.error(f"Failed to connect to Qdrant at {url}: {e}")
|
35 |
+
# Connection will be tested again when used
|
36 |
+
|
37 |
+
return client
|
38 |
+
|
39 |
+
|
40 |
+
# Initialize global client instance
|
41 |
+
qdrant_client = get_qdrant_client()
|
app/services/video_service.py
ADDED
@@ -0,0 +1,647 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import uuid
|
2 |
+
from typing import List, Dict, Any, Optional
|
3 |
+
import re
|
4 |
+
from datetime import datetime
|
5 |
+
from sentence_transformers import SentenceTransformer
|
6 |
+
from qdrant_client.http import models
|
7 |
+
from youtube_transcript_api import YouTubeTranscriptApi
|
8 |
+
import yt_dlp
|
9 |
+
from app.models.video import VideoSegment, Video, SearchResult
|
10 |
+
from app.services.qdrant_service import qdrant_client
|
11 |
+
|
12 |
+
# Initialize the sentence transformer model
|
13 |
+
model = SentenceTransformer("sentence-transformers/static-retrieval-mrl-en-v1")
|
14 |
+
|
15 |
+
# Collection names
|
16 |
+
COLLECTION_NAME = "video_segments"
|
17 |
+
PROCESSED_VIDEOS_COLLECTION = "processed_videos"
|
18 |
+
|
19 |
+
|
20 |
+
def _fetch_youtube_metadata(video_id: str, video: Optional[Video] = None) -> Video:
|
21 |
+
"""Helper function to fetch video metadata from YouTube using yt-dlp."""
|
22 |
+
import logging
|
23 |
+
|
24 |
+
if not video:
|
25 |
+
video = Video(video_id=video_id)
|
26 |
+
|
27 |
+
try:
|
28 |
+
logging.info(f"Fetching metadata for video {video_id} from YouTube")
|
29 |
+
|
30 |
+
# Configure yt-dlp options
|
31 |
+
ydl_opts = {
|
32 |
+
"skip_download": True, # Don't download the video
|
33 |
+
"quiet": True, # Don't print progress
|
34 |
+
"no_warnings": True, # Don't print warnings
|
35 |
+
"extract_flat": True, # Don't extract videos in playlists
|
36 |
+
"format": "best", # Best quality (doesn't matter since we're not downloading)
|
37 |
+
}
|
38 |
+
|
39 |
+
# Use yt-dlp to extract video info
|
40 |
+
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
|
41 |
+
info = ydl.extract_info(
|
42 |
+
f"https://www.youtube.com/watch?v={video_id}", download=False
|
43 |
+
)
|
44 |
+
|
45 |
+
# Set video properties if available
|
46 |
+
if info.get("title"):
|
47 |
+
video.title = info.get("title")
|
48 |
+
|
49 |
+
if info.get("description"):
|
50 |
+
video.description = info.get("description")
|
51 |
+
|
52 |
+
if info.get("uploader"):
|
53 |
+
video.channel = info.get("uploader")
|
54 |
+
|
55 |
+
logging.info(
|
56 |
+
f"Successfully retrieved video metadata: title='{video.title}', channel='{video.channel}'"
|
57 |
+
)
|
58 |
+
except Exception as meta_error:
|
59 |
+
logging.warning(f"Could not fetch metadata from YouTube: {str(meta_error)}")
|
60 |
+
if not video.title:
|
61 |
+
video.title = f"Video {video_id}"
|
62 |
+
|
63 |
+
return video
|
64 |
+
|
65 |
+
|
66 |
+
# Ensure collections exist
|
67 |
+
def ensure_collection_exists():
|
68 |
+
"""Ensure the required collections exist in Qdrant."""
|
69 |
+
import logging
|
70 |
+
|
71 |
+
try:
|
72 |
+
logging.info("Checking Qdrant collections")
|
73 |
+
collections = qdrant_client.get_collections().collections
|
74 |
+
collection_names = [collection.name for collection in collections]
|
75 |
+
logging.info(f"Existing collections: {collection_names}")
|
76 |
+
|
77 |
+
# Create video segments collection if it doesn't exist
|
78 |
+
if COLLECTION_NAME not in collection_names:
|
79 |
+
logging.info(f"Creating collection: {COLLECTION_NAME}")
|
80 |
+
vector_size = model.get_sentence_embedding_dimension()
|
81 |
+
qdrant_client.create_collection(
|
82 |
+
collection_name=COLLECTION_NAME,
|
83 |
+
vectors_config=models.VectorParams(
|
84 |
+
size=vector_size,
|
85 |
+
distance=models.Distance.COSINE,
|
86 |
+
),
|
87 |
+
)
|
88 |
+
logging.info(
|
89 |
+
f"Successfully created {COLLECTION_NAME} collection with vector size {vector_size}"
|
90 |
+
)
|
91 |
+
|
92 |
+
# Create processed videos collection if it doesn't exist
|
93 |
+
if PROCESSED_VIDEOS_COLLECTION not in collection_names:
|
94 |
+
logging.info(f"Creating collection: {PROCESSED_VIDEOS_COLLECTION}")
|
95 |
+
vector_size = model.get_sentence_embedding_dimension()
|
96 |
+
qdrant_client.create_collection(
|
97 |
+
collection_name=PROCESSED_VIDEOS_COLLECTION,
|
98 |
+
vectors_config=models.VectorParams(
|
99 |
+
size=vector_size,
|
100 |
+
distance=models.Distance.COSINE,
|
101 |
+
),
|
102 |
+
)
|
103 |
+
qdrant_client.create_payload_index(
|
104 |
+
collection_name=PROCESSED_VIDEOS_COLLECTION,
|
105 |
+
field_name="video_id",
|
106 |
+
field_schema=models.PayloadSchemaType.KEYWORD,
|
107 |
+
)
|
108 |
+
qdrant_client.create_payload_index(
|
109 |
+
collection_name=PROCESSED_VIDEOS_COLLECTION,
|
110 |
+
field_name="created_at",
|
111 |
+
field_schema=models.IntegerIndexParams(
|
112 |
+
type=models.IntegerIndexType.INTEGER,
|
113 |
+
range=True,
|
114 |
+
),
|
115 |
+
)
|
116 |
+
logging.info(
|
117 |
+
f"Successfully created {PROCESSED_VIDEOS_COLLECTION} collection with vector size {vector_size}"
|
118 |
+
)
|
119 |
+
except Exception as e:
|
120 |
+
import traceback
|
121 |
+
|
122 |
+
logging.error(f"Error ensuring collections exist: {str(e)}")
|
123 |
+
logging.error(traceback.format_exc())
|
124 |
+
raise
|
125 |
+
|
126 |
+
|
127 |
+
def get_embeddings(text: str) -> List[float]:
|
128 |
+
"""Get embeddings for the given text using SentenceTransformer."""
|
129 |
+
return model.encode(text).tolist()
|
130 |
+
|
131 |
+
|
132 |
+
def extract_video_id(youtube_url: str) -> str:
|
133 |
+
"""Extract YouTube video ID from URL."""
|
134 |
+
import logging
|
135 |
+
|
136 |
+
logging.info(f"Extracting video ID from URL: {youtube_url}")
|
137 |
+
|
138 |
+
# Match patterns like: https://www.youtube.com/watch?v=VIDEO_ID or https://youtu.be/VIDEO_ID
|
139 |
+
patterns = [
|
140 |
+
r"(?:youtube\.com/watch\?v=|youtu\.be/)([\w-]+)",
|
141 |
+
r"(?:youtube\.com/embed/)([\w-]+)",
|
142 |
+
r"(?:youtube\.com/v/)([\w-]+)",
|
143 |
+
]
|
144 |
+
|
145 |
+
for pattern in patterns:
|
146 |
+
match = re.search(pattern, youtube_url)
|
147 |
+
if match:
|
148 |
+
video_id = match.group(1)
|
149 |
+
logging.info(f"Extracted video ID: {video_id}")
|
150 |
+
return video_id
|
151 |
+
|
152 |
+
# If no pattern matches, assume the input might be a direct video ID
|
153 |
+
if re.match(r"^[\w-]+$", youtube_url):
|
154 |
+
logging.info(f"Using direct video ID: {youtube_url}")
|
155 |
+
return youtube_url
|
156 |
+
|
157 |
+
logging.error(f"Failed to extract video ID from URL: {youtube_url}")
|
158 |
+
raise ValueError(f"Could not extract video ID from URL: {youtube_url}")
|
159 |
+
|
160 |
+
|
161 |
+
def get_video_transcript(video_id: str) -> List[Dict[str, Any]]:
|
162 |
+
"""
|
163 |
+
Get transcript for a YouTube video in any available language.
|
164 |
+
Will try to get transcripts in this priority:
|
165 |
+
1. English transcript (if available)
|
166 |
+
2. Any available transcript translated to English (if translatable)
|
167 |
+
3. Any available transcript in its original language
|
168 |
+
"""
|
169 |
+
import logging
|
170 |
+
import traceback
|
171 |
+
|
172 |
+
try:
|
173 |
+
# Try to get available transcript languages
|
174 |
+
transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
|
175 |
+
|
176 |
+
# First, look for English transcript
|
177 |
+
english_transcript = None
|
178 |
+
other_transcripts = []
|
179 |
+
|
180 |
+
# Categorize available transcripts
|
181 |
+
for transcript_item in transcript_list:
|
182 |
+
if transcript_item.language_code == "en":
|
183 |
+
english_transcript = transcript_item
|
184 |
+
else:
|
185 |
+
other_transcripts.append(transcript_item)
|
186 |
+
|
187 |
+
# 1. Try English first if available
|
188 |
+
if english_transcript:
|
189 |
+
try:
|
190 |
+
logging.info("Found English transcript, using it directly")
|
191 |
+
return english_transcript.fetch()
|
192 |
+
except Exception as e:
|
193 |
+
logging.warning(f"Failed to fetch English transcript: {str(e)}")
|
194 |
+
|
195 |
+
# 2. Try translatable transcripts
|
196 |
+
translatable_transcripts = [t for t in other_transcripts if t.is_translatable]
|
197 |
+
for transcript_item in translatable_transcripts:
|
198 |
+
try:
|
199 |
+
logging.info(
|
200 |
+
f"Trying to translate {transcript_item.language_code} transcript to English"
|
201 |
+
)
|
202 |
+
translated = transcript_item.translate("en").fetch()
|
203 |
+
logging.info(
|
204 |
+
f"Successfully translated {transcript_item.language_code} transcript to English"
|
205 |
+
)
|
206 |
+
return translated
|
207 |
+
except Exception as e:
|
208 |
+
logging.warning(
|
209 |
+
f"Failed to translate {transcript_item.language_code} transcript: {str(e)}"
|
210 |
+
)
|
211 |
+
|
212 |
+
# 3. Try any transcript in original language
|
213 |
+
for transcript_item in other_transcripts:
|
214 |
+
try:
|
215 |
+
logging.info(
|
216 |
+
f"Using non-translated {transcript_item.language_code} transcript"
|
217 |
+
)
|
218 |
+
return transcript_item.fetch()
|
219 |
+
except Exception as e:
|
220 |
+
logging.warning(
|
221 |
+
f"Failed to fetch {transcript_item.language_code} transcript: {str(e)}"
|
222 |
+
)
|
223 |
+
|
224 |
+
# If we get here, no transcripts worked
|
225 |
+
available_langs = [t.language_code for t in transcript_list]
|
226 |
+
raise ValueError(
|
227 |
+
f"No usable transcripts found for video {video_id}. Available languages: {available_langs}"
|
228 |
+
)
|
229 |
+
|
230 |
+
except Exception as e:
|
231 |
+
logging.error(f"Transcript API error for video {video_id}: {str(e)}")
|
232 |
+
logging.error(traceback.format_exc())
|
233 |
+
raise ValueError(f"Could not get transcript for video {video_id}: {str(e)}")
|
234 |
+
|
235 |
+
|
236 |
+
def store_processed_video(video: Video) -> bool:
|
237 |
+
"""Store a processed video in Qdrant."""
|
238 |
+
try:
|
239 |
+
# Get a simple embedding for the video ID
|
240 |
+
vector = get_embeddings(f"video_{video.video_id}")
|
241 |
+
|
242 |
+
# Prepare payload
|
243 |
+
payload = video.model_dump()
|
244 |
+
|
245 |
+
# Store in Qdrant
|
246 |
+
qdrant_client.upsert(
|
247 |
+
collection_name=PROCESSED_VIDEOS_COLLECTION,
|
248 |
+
points=[
|
249 |
+
models.PointStruct(
|
250 |
+
id=uuid.uuid4().hex,
|
251 |
+
vector=vector,
|
252 |
+
payload=payload,
|
253 |
+
),
|
254 |
+
],
|
255 |
+
)
|
256 |
+
return True
|
257 |
+
except Exception as e:
|
258 |
+
print(f"Error storing processed video: {e}")
|
259 |
+
return False
|
260 |
+
|
261 |
+
|
262 |
+
def get_processed_videos(limit: int = 10) -> List[Video]:
|
263 |
+
"""Get recently processed videos ordered by creation time."""
|
264 |
+
try:
|
265 |
+
# Scroll through the processed videos collection
|
266 |
+
scroll_result = qdrant_client.scroll(
|
267 |
+
collection_name=PROCESSED_VIDEOS_COLLECTION,
|
268 |
+
limit=limit,
|
269 |
+
with_payload=True,
|
270 |
+
order_by=models.OrderBy(key="created_at", direction=models.Direction.DESC),
|
271 |
+
)
|
272 |
+
|
273 |
+
# Extract videos from the result
|
274 |
+
videos = []
|
275 |
+
for point in scroll_result[0]:
|
276 |
+
# Convert payload to Video
|
277 |
+
video = Video(**point.payload)
|
278 |
+
videos.append(video)
|
279 |
+
|
280 |
+
# Sort by created_at timestamp (most recent first)
|
281 |
+
videos.sort(key=lambda x: x.created_at or "", reverse=True)
|
282 |
+
|
283 |
+
return videos[:limit]
|
284 |
+
except Exception as e:
|
285 |
+
print(f"Error getting processed videos: {e}")
|
286 |
+
return []
|
287 |
+
|
288 |
+
|
289 |
+
def process_video(youtube_url: str) -> Video:
|
290 |
+
"""Process a YouTube video to extract and store transcript segments."""
|
291 |
+
import logging
|
292 |
+
import traceback
|
293 |
+
|
294 |
+
logging.info(f"Processing video URL: {youtube_url}")
|
295 |
+
transcript = None
|
296 |
+
video_id = None
|
297 |
+
|
298 |
+
# Extract video ID and get transcript
|
299 |
+
try:
|
300 |
+
# Extract video ID
|
301 |
+
video_id = extract_video_id(youtube_url)
|
302 |
+
logging.info(f"Successfully extracted video ID: {video_id}")
|
303 |
+
|
304 |
+
# Check if video has already been processed
|
305 |
+
existing_video = get_video_by_id(video_id)
|
306 |
+
if existing_video and existing_video.processed:
|
307 |
+
logging.info(
|
308 |
+
f"Video {video_id} has already been processed. Skipping processing."
|
309 |
+
)
|
310 |
+
return existing_video
|
311 |
+
|
312 |
+
# Create basic video object with current timestamp
|
313 |
+
current_time = int(datetime.utcnow().timestamp())
|
314 |
+
video = Video(video_id=video_id, created_at=current_time)
|
315 |
+
|
316 |
+
# Get video metadata from YouTube using the helper function
|
317 |
+
try:
|
318 |
+
video = _fetch_youtube_metadata(video_id, video)
|
319 |
+
except Exception as meta_error:
|
320 |
+
logging.warning(
|
321 |
+
f"Error fetching YouTube metadata during processing: {str(meta_error)}"
|
322 |
+
)
|
323 |
+
# Continue with processing even if metadata fetch fails
|
324 |
+
|
325 |
+
# Get transcript
|
326 |
+
logging.info(f"Fetching transcript for video ID: {video_id}")
|
327 |
+
transcript = get_video_transcript(video_id)
|
328 |
+
logging.info(
|
329 |
+
f"Successfully retrieved transcript with {len(transcript)} entries"
|
330 |
+
)
|
331 |
+
|
332 |
+
# If we couldn't get metadata and have a transcript, try to extract a title from transcript
|
333 |
+
if (
|
334 |
+
(not video.title or video.title == f"Video {video_id}")
|
335 |
+
and transcript
|
336 |
+
and len(transcript) > 0
|
337 |
+
):
|
338 |
+
# Handle different transcript formats
|
339 |
+
try:
|
340 |
+
# Check if transcript is a list of dictionaries (original format)
|
341 |
+
if isinstance(transcript[0], dict) and "text" in transcript[0]:
|
342 |
+
video.title = f"{transcript[0]['text'][:30]}..."
|
343 |
+
# Check if transcript is a list of objects with text attribute
|
344 |
+
elif hasattr(transcript[0], "text"):
|
345 |
+
video.title = f"{transcript[0].text[:30]}..."
|
346 |
+
# If it's another format, just use the string representation of first item
|
347 |
+
else:
|
348 |
+
first_item_str = str(transcript[0])[:30]
|
349 |
+
video.title = f"{first_item_str}..."
|
350 |
+
logging.info(f"Set video title from transcript: {video.title}")
|
351 |
+
except Exception as title_error:
|
352 |
+
logging.warning(
|
353 |
+
f"Could not set title from transcript: {str(title_error)}"
|
354 |
+
)
|
355 |
+
except Exception as e:
|
356 |
+
logging.error(f"Error in initial video processing: {str(e)}")
|
357 |
+
logging.error(traceback.format_exc())
|
358 |
+
raise
|
359 |
+
|
360 |
+
# Process transcript into segments
|
361 |
+
try:
|
362 |
+
# Process transcript into overlapping 30-second segments with 10-second overlap
|
363 |
+
logging.info(f"Processing {len(transcript)} transcript entries into segments")
|
364 |
+
segments = []
|
365 |
+
|
366 |
+
# First, normalize the transcript to a standard format
|
367 |
+
normalized_transcript = []
|
368 |
+
for item in transcript:
|
369 |
+
if (
|
370 |
+
isinstance(item, dict)
|
371 |
+
and "text" in item
|
372 |
+
and "start" in item
|
373 |
+
and "duration" in item
|
374 |
+
):
|
375 |
+
# Original dictionary format
|
376 |
+
normalized_transcript.append(
|
377 |
+
{
|
378 |
+
"text": item["text"],
|
379 |
+
"start": item["start"],
|
380 |
+
"duration": item["duration"],
|
381 |
+
}
|
382 |
+
)
|
383 |
+
elif (
|
384 |
+
hasattr(item, "text")
|
385 |
+
and hasattr(item, "start")
|
386 |
+
and hasattr(item, "duration")
|
387 |
+
):
|
388 |
+
# Object with attributes
|
389 |
+
normalized_transcript.append(
|
390 |
+
{"text": item.text, "start": item.start, "duration": item.duration}
|
391 |
+
)
|
392 |
+
else:
|
393 |
+
# Unknown format, try to extract what we can
|
394 |
+
logging.warning(
|
395 |
+
f"Encountered unknown transcript item format: {type(item)}"
|
396 |
+
)
|
397 |
+
try:
|
398 |
+
# Convert to string if we can't determine the structure
|
399 |
+
text = str(item)
|
400 |
+
# Use index as a timestamp approximation
|
401 |
+
idx = transcript.index(item)
|
402 |
+
normalized_transcript.append(
|
403 |
+
{
|
404 |
+
"text": text,
|
405 |
+
"start": float(idx * 5), # Approximate 5 seconds per item
|
406 |
+
"duration": 5.0,
|
407 |
+
}
|
408 |
+
)
|
409 |
+
except Exception as e:
|
410 |
+
logging.error(f"Failed to normalize transcript item: {str(e)}")
|
411 |
+
continue
|
412 |
+
|
413 |
+
# Use the normalized transcript for segment processing
|
414 |
+
for i in range(len(normalized_transcript)):
|
415 |
+
# Find segments that form approximately 30 seconds
|
416 |
+
segment_text = []
|
417 |
+
start_time = normalized_transcript[i]["start"]
|
418 |
+
end_time = start_time
|
419 |
+
current_index = i
|
420 |
+
|
421 |
+
while (
|
422 |
+
current_index < len(normalized_transcript)
|
423 |
+
and end_time - start_time < 30
|
424 |
+
):
|
425 |
+
segment_text.append(normalized_transcript[current_index]["text"])
|
426 |
+
end_time = (
|
427 |
+
normalized_transcript[current_index]["start"]
|
428 |
+
+ normalized_transcript[current_index]["duration"]
|
429 |
+
)
|
430 |
+
current_index += 1
|
431 |
+
|
432 |
+
if segment_text: # Only create segment if we have text
|
433 |
+
segment_id = f"{video_id}_{i}"
|
434 |
+
text = " ".join(segment_text)
|
435 |
+
|
436 |
+
# Create VideoSegment
|
437 |
+
segment = VideoSegment(
|
438 |
+
text=text,
|
439 |
+
start=start_time,
|
440 |
+
end=end_time,
|
441 |
+
segment_id=segment_id,
|
442 |
+
video_id=video_id,
|
443 |
+
)
|
444 |
+
|
445 |
+
segments.append(segment)
|
446 |
+
|
447 |
+
# Skip forward with 10-second overlap (if we're not at the end)
|
448 |
+
if (
|
449 |
+
i + 1 < len(normalized_transcript)
|
450 |
+
and normalized_transcript[i + 1]["start"] < end_time - 10
|
451 |
+
):
|
452 |
+
# Find the next segment that starts at least 20 seconds after our current start
|
453 |
+
while (
|
454 |
+
i + 1 < len(normalized_transcript)
|
455 |
+
and normalized_transcript[i + 1]["start"] < start_time + 20
|
456 |
+
):
|
457 |
+
i += 1
|
458 |
+
|
459 |
+
logging.info(f"Created {len(segments)} segments from transcript")
|
460 |
+
|
461 |
+
# Store segments in Qdrant
|
462 |
+
logging.info("Ensuring Qdrant collections exist")
|
463 |
+
ensure_collection_exists()
|
464 |
+
|
465 |
+
# Store each segment
|
466 |
+
logging.info(f"Storing {len(segments)} segments in Qdrant")
|
467 |
+
for segment in segments:
|
468 |
+
store_segment(segment)
|
469 |
+
except Exception as e:
|
470 |
+
logging.error(f"Error processing transcript segments: {str(e)}")
|
471 |
+
logging.error(traceback.format_exc())
|
472 |
+
raise
|
473 |
+
|
474 |
+
# Mark video as processed and store it
|
475 |
+
try:
|
476 |
+
logging.info(f"Marking video {video_id} as processed")
|
477 |
+
video.processed = True
|
478 |
+
|
479 |
+
# Store the processed video in Qdrant
|
480 |
+
logging.info("Storing processed video in Qdrant")
|
481 |
+
store_result = store_processed_video(video)
|
482 |
+
if store_result:
|
483 |
+
logging.info(f"Successfully stored processed video: {video_id}")
|
484 |
+
else:
|
485 |
+
logging.warning(f"Failed to store processed video in Qdrant: {video_id}")
|
486 |
+
|
487 |
+
return video
|
488 |
+
except Exception as e:
|
489 |
+
logging.error(f"Error storing processed video: {str(e)}")
|
490 |
+
logging.error(traceback.format_exc())
|
491 |
+
raise
|
492 |
+
|
493 |
+
|
494 |
+
def store_segment(segment: VideoSegment) -> bool:
|
495 |
+
"""Store a video segment in Qdrant."""
|
496 |
+
import logging
|
497 |
+
|
498 |
+
try:
|
499 |
+
# Get embeddings
|
500 |
+
logging.debug(f"Getting embeddings for segment {segment.segment_id}")
|
501 |
+
vector = get_embeddings(segment.text)
|
502 |
+
|
503 |
+
# Prepare payload
|
504 |
+
payload = segment.model_dump()
|
505 |
+
|
506 |
+
# Store in Qdrant
|
507 |
+
point_id = uuid.uuid4().hex
|
508 |
+
logging.debug(
|
509 |
+
f"Storing segment {segment.segment_id} in Qdrant with point ID {point_id}"
|
510 |
+
)
|
511 |
+
qdrant_client.upsert(
|
512 |
+
collection_name=COLLECTION_NAME,
|
513 |
+
points=[
|
514 |
+
models.PointStruct(
|
515 |
+
id=point_id,
|
516 |
+
vector=vector,
|
517 |
+
payload=payload,
|
518 |
+
),
|
519 |
+
],
|
520 |
+
)
|
521 |
+
return True
|
522 |
+
except Exception as e:
|
523 |
+
import traceback
|
524 |
+
|
525 |
+
logging.error(f"Error storing segment {segment.segment_id}: {str(e)}")
|
526 |
+
logging.error(traceback.format_exc())
|
527 |
+
return False
|
528 |
+
|
529 |
+
|
530 |
+
def search_video_segments(
|
531 |
+
query: str, video_id: Optional[str] = None, limit: int = 5
|
532 |
+
) -> List[SearchResult]:
|
533 |
+
"""Search for video segments based on the provided query."""
|
534 |
+
# Get query embeddings
|
535 |
+
query_vector = get_embeddings(query)
|
536 |
+
|
537 |
+
# Prepare filter if video_id is provided
|
538 |
+
filter_param = None
|
539 |
+
if video_id:
|
540 |
+
filter_param = models.Filter(
|
541 |
+
must=[
|
542 |
+
models.FieldCondition(
|
543 |
+
key="video_id",
|
544 |
+
match=models.MatchValue(value=video_id),
|
545 |
+
),
|
546 |
+
],
|
547 |
+
)
|
548 |
+
|
549 |
+
# Search in Qdrant
|
550 |
+
search_result = qdrant_client.search(
|
551 |
+
collection_name=COLLECTION_NAME,
|
552 |
+
query_vector=query_vector,
|
553 |
+
limit=limit,
|
554 |
+
query_filter=filter_param,
|
555 |
+
)
|
556 |
+
|
557 |
+
# Format results
|
558 |
+
results = []
|
559 |
+
for scored_point in search_result:
|
560 |
+
# Convert payload to VideoSegment
|
561 |
+
segment = VideoSegment(**scored_point.payload)
|
562 |
+
|
563 |
+
# Create SearchResult
|
564 |
+
result = SearchResult(
|
565 |
+
score=scored_point.score,
|
566 |
+
segment=segment,
|
567 |
+
)
|
568 |
+
results.append(result)
|
569 |
+
|
570 |
+
return results
|
571 |
+
|
572 |
+
|
573 |
+
def get_all_segments(video_id: str) -> List[VideoSegment]:
|
574 |
+
"""Get all segments for a specific video, ordered by start time."""
|
575 |
+
# Prepare filter for the video_id
|
576 |
+
filter_param = models.Filter(
|
577 |
+
must=[
|
578 |
+
models.FieldCondition(
|
579 |
+
key="video_id",
|
580 |
+
match=models.MatchValue(value=video_id),
|
581 |
+
),
|
582 |
+
],
|
583 |
+
)
|
584 |
+
|
585 |
+
# Search in Qdrant without vector, just to get all segments
|
586 |
+
scroll_result = qdrant_client.scroll(
|
587 |
+
collection_name=COLLECTION_NAME,
|
588 |
+
scroll_filter=filter_param,
|
589 |
+
limit=10000, # Adjust based on expected maximum segments
|
590 |
+
)
|
591 |
+
|
592 |
+
# Format results
|
593 |
+
segments = []
|
594 |
+
for point in scroll_result[0]:
|
595 |
+
# Convert payload to VideoSegment
|
596 |
+
segment = VideoSegment(**point.payload)
|
597 |
+
segments.append(segment)
|
598 |
+
|
599 |
+
# Sort by start time
|
600 |
+
segments.sort(key=lambda x: x.start)
|
601 |
+
|
602 |
+
return segments
|
603 |
+
|
604 |
+
|
605 |
+
def get_video_by_id(video_id: str) -> Optional[Video]:
|
606 |
+
"""Get a specific video by its video_id. If not found in database, attempt to fetch from YouTube."""
|
607 |
+
import logging
|
608 |
+
|
609 |
+
try:
|
610 |
+
# Create filter for the video_id
|
611 |
+
filter_param = models.Filter(
|
612 |
+
must=[
|
613 |
+
models.FieldCondition(
|
614 |
+
key="video_id",
|
615 |
+
match=models.MatchValue(value=video_id),
|
616 |
+
),
|
617 |
+
],
|
618 |
+
)
|
619 |
+
|
620 |
+
# Search in the processed_videos collection
|
621 |
+
scroll_result = qdrant_client.scroll(
|
622 |
+
collection_name=PROCESSED_VIDEOS_COLLECTION,
|
623 |
+
scroll_filter=filter_param,
|
624 |
+
limit=1, # We only need one result
|
625 |
+
with_payload=True,
|
626 |
+
)
|
627 |
+
|
628 |
+
# Check if any results were found
|
629 |
+
if scroll_result[0]:
|
630 |
+
# Convert payload to Video
|
631 |
+
video = Video(**scroll_result[0][0].payload)
|
632 |
+
|
633 |
+
# If video exists but doesn't have title, try to fetch it from YouTube
|
634 |
+
if not video.title or video.title == f"Video {video_id}":
|
635 |
+
video = _fetch_youtube_metadata(video_id, video)
|
636 |
+
|
637 |
+
return video
|
638 |
+
|
639 |
+
# If video not found in database, fetch basic metadata from YouTube
|
640 |
+
logging.info(f"Video {video_id} not found in database, fetching from YouTube")
|
641 |
+
video = Video(video_id=video_id)
|
642 |
+
return _fetch_youtube_metadata(video_id, video)
|
643 |
+
|
644 |
+
except Exception as e:
|
645 |
+
logging.error(f"Error getting video by ID {video_id}: {str(e)}")
|
646 |
+
# Return a basic video object with just the ID
|
647 |
+
return Video(video_id=video_id, title=f"Video {video_id}")
|
app/static/css/style.css
ADDED
@@ -0,0 +1,137 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
/* Custom styles */
|
2 |
+
|
3 |
+
/* Video Carousel */
|
4 |
+
.carousel-item {
|
5 |
+
scroll-snap-align: center;
|
6 |
+
}
|
7 |
+
|
8 |
+
/* Make carousel items responsive but maintain minimum width */
|
9 |
+
@media (max-width: 640px) {
|
10 |
+
.carousel-item {
|
11 |
+
min-width: 200px;
|
12 |
+
}
|
13 |
+
}
|
14 |
+
|
15 |
+
@media (min-width: 640px) {
|
16 |
+
.carousel-item {
|
17 |
+
min-width: 250px;
|
18 |
+
}
|
19 |
+
}
|
20 |
+
|
21 |
+
/* Carousel container - don't let arrows overlap content */
|
22 |
+
.carousel {
|
23 |
+
overflow-x: hidden;
|
24 |
+
scrollbar-width: none; /* Hide scrollbar for Firefox */
|
25 |
+
-ms-overflow-style: none; /* Hide scrollbar for IE/Edge */
|
26 |
+
}
|
27 |
+
|
28 |
+
.carousel::-webkit-scrollbar {
|
29 |
+
display: none; /* Hide scrollbar for Chrome/Safari/Opera */
|
30 |
+
}
|
31 |
+
|
32 |
+
/* Navigation arrow styles */
|
33 |
+
.btn-circle.btn-disabled {
|
34 |
+
opacity: 0.5;
|
35 |
+
cursor: not-allowed;
|
36 |
+
}
|
37 |
+
|
38 |
+
/* Video card styling - ensure proper structure */
|
39 |
+
.carousel-item .card {
|
40 |
+
display: flex;
|
41 |
+
flex-direction: column;
|
42 |
+
height: 100%;
|
43 |
+
}
|
44 |
+
|
45 |
+
.carousel-item .card figure {
|
46 |
+
width: 100%;
|
47 |
+
flex: 0 0 auto;
|
48 |
+
}
|
49 |
+
|
50 |
+
.carousel-item .card .card-body {
|
51 |
+
flex: 1 0 auto;
|
52 |
+
display: flex;
|
53 |
+
flex-direction: column;
|
54 |
+
}
|
55 |
+
|
56 |
+
/* Transcript container */
|
57 |
+
.transcript-container {
|
58 |
+
max-height: 500px;
|
59 |
+
overflow-y: auto;
|
60 |
+
padding-right: 1rem;
|
61 |
+
}
|
62 |
+
|
63 |
+
/* Transcript segments */
|
64 |
+
.transcript-segment {
|
65 |
+
padding: 0.625rem;
|
66 |
+
margin-bottom: 0.5rem;
|
67 |
+
border-radius: 0.5rem;
|
68 |
+
cursor: pointer;
|
69 |
+
transition: all 0.2s ease;
|
70 |
+
border: 1px solid transparent;
|
71 |
+
background-color: var(--base-200, #f3f4f6);
|
72 |
+
}
|
73 |
+
|
74 |
+
.transcript-segment:hover {
|
75 |
+
background-color: var(--base-300, #e5e7eb);
|
76 |
+
transform: translateY(-1px);
|
77 |
+
box-shadow: 0 2px 5px rgba(0, 0, 0, 0.05);
|
78 |
+
}
|
79 |
+
|
80 |
+
.transcript-segment.highlight {
|
81 |
+
background-color: var(--primary-focus, rgba(59, 130, 246, 0.2));
|
82 |
+
border-left: 3px solid var(--primary, #3b82f6);
|
83 |
+
box-shadow: 0 2px 8px rgba(0, 0, 0, 0.08);
|
84 |
+
}
|
85 |
+
|
86 |
+
.transcript-segment.hidden-segment {
|
87 |
+
display: none;
|
88 |
+
}
|
89 |
+
|
90 |
+
/* Timestamp */
|
91 |
+
.timestamp {
|
92 |
+
display: inline-block;
|
93 |
+
background-color: var(--neutral, #e5e7eb);
|
94 |
+
padding: 0.125rem 0.5rem;
|
95 |
+
border-radius: 9999px;
|
96 |
+
font-size: 0.75rem;
|
97 |
+
font-weight: bold;
|
98 |
+
color: var(--neutral-content, #4b5563);
|
99 |
+
margin-right: 0.5rem;
|
100 |
+
}
|
101 |
+
|
102 |
+
/* Score badge */
|
103 |
+
.score-badge {
|
104 |
+
display: inline-block;
|
105 |
+
background-color: var(--primary, #3b82f6);
|
106 |
+
color: var(--primary-content, white);
|
107 |
+
border-radius: 9999px;
|
108 |
+
padding: 0.125rem 0.5rem;
|
109 |
+
font-size: 0.75rem;
|
110 |
+
margin-left: 0.5rem;
|
111 |
+
}
|
112 |
+
|
113 |
+
/* Search result */
|
114 |
+
.search-result {
|
115 |
+
transition: all 0.2s ease;
|
116 |
+
}
|
117 |
+
|
118 |
+
.search-result:hover {
|
119 |
+
transform: translateY(-2px);
|
120 |
+
}
|
121 |
+
|
122 |
+
/* Metadata tags */
|
123 |
+
.metadata-tags {
|
124 |
+
display: flex;
|
125 |
+
flex-wrap: wrap;
|
126 |
+
gap: 0.25rem;
|
127 |
+
margin-top: 0.5rem;
|
128 |
+
}
|
129 |
+
|
130 |
+
.metadata-tag {
|
131 |
+
font-size: 0.7rem;
|
132 |
+
padding: 0.1rem 0.4rem;
|
133 |
+
border-radius: 9999px;
|
134 |
+
background-color: var(--accent, #d8b4fe);
|
135 |
+
color: var(--accent-content, #581c87);
|
136 |
+
white-space: nowrap;
|
137 |
+
}
|
app/static/js/index.js
ADDED
@@ -0,0 +1,268 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
// Index page functionality
|
2 |
+
document.addEventListener('DOMContentLoaded', () => {
|
3 |
+
const youtubeUrlInput = document.getElementById('youtube-url');
|
4 |
+
const processButton = document.getElementById('process-button');
|
5 |
+
const processStatus = document.getElementById('process-status');
|
6 |
+
const processingIndicator = document.getElementById('processing');
|
7 |
+
const recentlyProcessedCard = document.getElementById('recently-processed');
|
8 |
+
const videoListContainer = document.getElementById('video-list');
|
9 |
+
|
10 |
+
// Example video buttons
|
11 |
+
const exampleButtons = document.querySelectorAll('.example-video');
|
12 |
+
|
13 |
+
// Process button click handler
|
14 |
+
processButton.addEventListener('click', () => processVideo());
|
15 |
+
|
16 |
+
// Enter key in input field
|
17 |
+
youtubeUrlInput.addEventListener('keypress', (e) => {
|
18 |
+
if (e.key === 'Enter') processVideo();
|
19 |
+
});
|
20 |
+
|
21 |
+
// Example video buttons
|
22 |
+
exampleButtons.forEach(button => {
|
23 |
+
button.addEventListener('click', () => {
|
24 |
+
youtubeUrlInput.value = button.dataset.url;
|
25 |
+
processVideo();
|
26 |
+
});
|
27 |
+
});
|
28 |
+
|
29 |
+
// Process video function
|
30 |
+
function processVideo() {
|
31 |
+
const youtubeUrl = youtubeUrlInput.value.trim();
|
32 |
+
if (!youtubeUrl) {
|
33 |
+
processStatus.innerHTML = '<div class="alert alert-warning">Please enter a YouTube URL</div>';
|
34 |
+
return;
|
35 |
+
}
|
36 |
+
|
37 |
+
// Extract video ID
|
38 |
+
const videoId = extractVideoId(youtubeUrl);
|
39 |
+
if (!videoId) {
|
40 |
+
processStatus.innerHTML = '<div class="alert alert-error">Invalid YouTube URL</div>';
|
41 |
+
return;
|
42 |
+
}
|
43 |
+
|
44 |
+
// Show loading indicator with spinner and text
|
45 |
+
processStatus.innerHTML = `
|
46 |
+
<div class="flex items-center justify-center my-4">
|
47 |
+
<span class="loading loading-spinner loading-md text-primary"></span>
|
48 |
+
<span class="ml-2">Processing video... This may take a few moments</span>
|
49 |
+
</div>
|
50 |
+
`;
|
51 |
+
|
52 |
+
// Set a timeout to handle overly long processing
|
53 |
+
const timeoutId = setTimeout(() => {
|
54 |
+
processStatus.innerHTML = `
|
55 |
+
<div class="alert alert-warning">
|
56 |
+
<svg xmlns="http://www.w3.org/2000/svg" class="stroke-current shrink-0 h-6 w-6" fill="none" viewBox="0 0 24 24">
|
57 |
+
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M12 9v2m0 4h.01m-6.938 4h13.856c1.54 0 2.502-1.667 1.732-3L13.732 4c-.77-1.333-2.694-1.333-3.464 0L3.34 16c-.77 1.333.192 3 1.732 3z" />
|
58 |
+
</svg>
|
59 |
+
<span>Processing is taking longer than expected. Please wait...</span>
|
60 |
+
</div>
|
61 |
+
`;
|
62 |
+
}, 20000); // 20 seconds
|
63 |
+
|
64 |
+
// Send request to process the video
|
65 |
+
fetch('/api/video/process', {
|
66 |
+
method: 'POST',
|
67 |
+
headers: {
|
68 |
+
'Content-Type': 'application/json'
|
69 |
+
},
|
70 |
+
body: JSON.stringify({ url: youtubeUrl })
|
71 |
+
})
|
72 |
+
.then(response => {
|
73 |
+
if (!response.ok) {
|
74 |
+
throw new Error('Failed to process video');
|
75 |
+
}
|
76 |
+
return response.json();
|
77 |
+
})
|
78 |
+
.then(data => {
|
79 |
+
// Clear timeout for long-running process
|
80 |
+
clearTimeout(timeoutId);
|
81 |
+
|
82 |
+
// Extract video ID from response (handles both old and new API formats)
|
83 |
+
const videoId = data.video ? data.video.video_id : data.video_id;
|
84 |
+
const isNewlyProcessed = data.newly_processed !== undefined ? data.newly_processed : true;
|
85 |
+
|
86 |
+
if (!videoId) {
|
87 |
+
throw new Error('Invalid response: Missing video ID');
|
88 |
+
}
|
89 |
+
|
90 |
+
// Get video title (for display)
|
91 |
+
const videoTitle = data.video ? data.video.title : (data.title || `Video ${videoId}`);
|
92 |
+
|
93 |
+
// Log for debugging
|
94 |
+
console.log('Process response:', {videoId, isNewlyProcessed, data});
|
95 |
+
|
96 |
+
// Show success message
|
97 |
+
processStatus.innerHTML = `
|
98 |
+
<div role="alert" class="alert alert-success">
|
99 |
+
<svg xmlns="http://www.w3.org/2000/svg" class="stroke-current shrink-0 h-6 w-6" fill="none" viewBox="0 0 24 24">
|
100 |
+
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M9 12l2 2 4-4m6 2a9 9 0 11-18 0 9 9 0 0118 0z" />
|
101 |
+
</svg>
|
102 |
+
<span>${isNewlyProcessed ? 'Video processed successfully!' : 'Video was already processed!'}</span>
|
103 |
+
<div>
|
104 |
+
<a href="/video/${videoId}" class="btn btn-sm btn-primary">
|
105 |
+
<svg xmlns="http://www.w3.org/2000/svg" class="h-4 w-4 mr-1" fill="none" viewBox="0 0 24 24" stroke="currentColor">
|
106 |
+
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M14.752 11.168l-3.197-2.132A1 1 0 0010 9.87v4.263a1 1 0 001.555.832l3.197-2.132a1 1 0 000-1.664z" />
|
107 |
+
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M21 12a9 9 0 11-18 0 9 9 0 0118 0z" />
|
108 |
+
</svg>
|
109 |
+
Open Video
|
110 |
+
</a>
|
111 |
+
</div>
|
112 |
+
</div>
|
113 |
+
`;
|
114 |
+
|
115 |
+
// Update recent videos lists
|
116 |
+
displayRecentVideos();
|
117 |
+
loadFooterRecentVideos(); // Update footer videos as well
|
118 |
+
})
|
119 |
+
.catch(error => {
|
120 |
+
// Clear timeout for long-running process
|
121 |
+
clearTimeout(timeoutId);
|
122 |
+
|
123 |
+
// Show error message
|
124 |
+
console.error('Process error:', error);
|
125 |
+
processStatus.innerHTML = handleError(error);
|
126 |
+
});
|
127 |
+
}
|
128 |
+
|
129 |
+
// Display recently processed videos
|
130 |
+
function displayRecentVideos() {
|
131 |
+
// Show loading state
|
132 |
+
recentlyProcessedCard.classList.remove('hidden');
|
133 |
+
videoListContainer.innerHTML = `
|
134 |
+
<div class="flex justify-center items-center p-4">
|
135 |
+
<span class="loading loading-spinner loading-md"></span>
|
136 |
+
<span class="ml-2">Loading recent videos...</span>
|
137 |
+
</div>
|
138 |
+
`;
|
139 |
+
|
140 |
+
const carouselPrev = document.getElementById('carousel-prev');
|
141 |
+
const carouselNext = document.getElementById('carousel-next');
|
142 |
+
|
143 |
+
// Fetch recent videos from server
|
144 |
+
fetch('/api/video/recent?limit=5')
|
145 |
+
.then(response => {
|
146 |
+
if (!response.ok) {
|
147 |
+
throw new Error('Failed to fetch recent videos');
|
148 |
+
}
|
149 |
+
return response.json();
|
150 |
+
})
|
151 |
+
.then(videos => {
|
152 |
+
if (videos && videos.length > 0) {
|
153 |
+
// Limit to 5 videos
|
154 |
+
const limitedVideos = videos.slice(0, 5);
|
155 |
+
|
156 |
+
// Generate carousel items
|
157 |
+
const carouselItems = limitedVideos.map((video, index) => {
|
158 |
+
// Format date if available
|
159 |
+
let formattedDate = '';
|
160 |
+
if (video.created_at) {
|
161 |
+
const date = new Date(video.created_at * 1000); // Convert Unix timestamp to milliseconds
|
162 |
+
formattedDate = date.toLocaleDateString();
|
163 |
+
}
|
164 |
+
|
165 |
+
// Use title or default
|
166 |
+
const videoTitle = video.title || `Video ${video.video_id}`;
|
167 |
+
|
168 |
+
return `
|
169 |
+
<div id="video-${index}" class="carousel-item">
|
170 |
+
<a href="/video/${video.video_id}" class="card bg-base-100 shadow-sm hover:shadow-md transition-all w-64 md:w-72 flex flex-col">
|
171 |
+
<figure class="w-full h-36 overflow-hidden">
|
172 |
+
<img src="https://img.youtube.com/vi/${video.video_id}/mqdefault.jpg" alt="Thumbnail" class="w-full h-full object-cover">
|
173 |
+
</figure>
|
174 |
+
<div class="card-body p-3">
|
175 |
+
<h3 class="card-title text-sm line-clamp-2">${videoTitle}</h3>
|
176 |
+
<div class="text-xs opacity-70">${formattedDate}</div>
|
177 |
+
</div>
|
178 |
+
</a>
|
179 |
+
</div>
|
180 |
+
`;
|
181 |
+
}).join('');
|
182 |
+
|
183 |
+
// Add carousel items to container
|
184 |
+
videoListContainer.innerHTML = carouselItems;
|
185 |
+
|
186 |
+
// Setup navigation arrows
|
187 |
+
if (limitedVideos.length > 1) {
|
188 |
+
// Show arrows for multiple videos
|
189 |
+
let currentIndex = 0;
|
190 |
+
const maxIndex = limitedVideos.length - 1;
|
191 |
+
|
192 |
+
// Show navigation arrows
|
193 |
+
carouselPrev.classList.remove('hidden');
|
194 |
+
carouselNext.classList.remove('hidden');
|
195 |
+
|
196 |
+
// Left button is disabled by default (we're at the start)
|
197 |
+
const prevButton = carouselPrev.querySelector('button');
|
198 |
+
const nextButton = carouselNext.querySelector('button');
|
199 |
+
prevButton.classList.add('btn-disabled');
|
200 |
+
|
201 |
+
// Functions to update button states
|
202 |
+
const updateButtonStates = () => {
|
203 |
+
if (currentIndex === 0) {
|
204 |
+
prevButton.classList.add('btn-disabled');
|
205 |
+
} else {
|
206 |
+
prevButton.classList.remove('btn-disabled');
|
207 |
+
}
|
208 |
+
|
209 |
+
if (currentIndex === maxIndex) {
|
210 |
+
nextButton.classList.add('btn-disabled');
|
211 |
+
} else {
|
212 |
+
nextButton.classList.remove('btn-disabled');
|
213 |
+
}
|
214 |
+
};
|
215 |
+
|
216 |
+
// Setup navigation buttons
|
217 |
+
prevButton.addEventListener('click', () => {
|
218 |
+
if (currentIndex > 0) {
|
219 |
+
currentIndex--;
|
220 |
+
document.getElementById(`video-${currentIndex}`).scrollIntoView({
|
221 |
+
behavior: 'smooth',
|
222 |
+
block: 'nearest',
|
223 |
+
inline: 'center'
|
224 |
+
});
|
225 |
+
updateButtonStates();
|
226 |
+
}
|
227 |
+
});
|
228 |
+
|
229 |
+
nextButton.addEventListener('click', () => {
|
230 |
+
if (currentIndex < maxIndex) {
|
231 |
+
currentIndex++;
|
232 |
+
document.getElementById(`video-${currentIndex}`).scrollIntoView({
|
233 |
+
behavior: 'smooth',
|
234 |
+
block: 'nearest',
|
235 |
+
inline: 'center'
|
236 |
+
});
|
237 |
+
updateButtonStates();
|
238 |
+
}
|
239 |
+
});
|
240 |
+
} else {
|
241 |
+
// Hide arrows for single video
|
242 |
+
carouselPrev.classList.add('hidden');
|
243 |
+
carouselNext.classList.add('hidden');
|
244 |
+
}
|
245 |
+
} else {
|
246 |
+
recentlyProcessedCard.classList.add('hidden');
|
247 |
+
carouselPrev.classList.add('hidden');
|
248 |
+
carouselNext.classList.add('hidden');
|
249 |
+
}
|
250 |
+
})
|
251 |
+
.catch(error => {
|
252 |
+
console.error('Error fetching recent videos:', error);
|
253 |
+
videoListContainer.innerHTML = `
|
254 |
+
<div class="alert alert-error">
|
255 |
+
<svg xmlns="http://www.w3.org/2000/svg" class="stroke-current shrink-0 h-6 w-6" fill="none" viewBox="0 0 24 24">
|
256 |
+
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M10 14l2-2m0 0l2-2m-2 2l-2-2m2 2l2 2m7-2a9 9 0 11-18 0 9 9 0 0118 0z" />
|
257 |
+
</svg>
|
258 |
+
<span>Failed to load recent videos</span>
|
259 |
+
</div>
|
260 |
+
`;
|
261 |
+
carouselPrev.classList.add('hidden');
|
262 |
+
carouselNext.classList.add('hidden');
|
263 |
+
});
|
264 |
+
}
|
265 |
+
|
266 |
+
// Display recent videos on page load
|
267 |
+
displayRecentVideos();
|
268 |
+
});
|
app/static/js/main.js
ADDED
@@ -0,0 +1,139 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
// Common functionality
|
2 |
+
|
3 |
+
// Initialize on page load
|
4 |
+
document.addEventListener('DOMContentLoaded', () => {
|
5 |
+
// Display recent videos in the footer on page load
|
6 |
+
loadFooterRecentVideos();
|
7 |
+
|
8 |
+
// Handle theme switching
|
9 |
+
const themeItems = document.querySelectorAll('.theme-item');
|
10 |
+
themeItems.forEach(item => {
|
11 |
+
item.addEventListener('click', () => {
|
12 |
+
const theme = item.dataset.theme;
|
13 |
+
document.documentElement.setAttribute('data-theme', theme);
|
14 |
+
localStorage.setItem('theme', theme);
|
15 |
+
});
|
16 |
+
});
|
17 |
+
|
18 |
+
// Apply saved theme from localStorage if available
|
19 |
+
const savedTheme = localStorage.getItem('theme');
|
20 |
+
if (savedTheme) {
|
21 |
+
document.documentElement.setAttribute('data-theme', savedTheme);
|
22 |
+
}
|
23 |
+
});
|
24 |
+
|
25 |
+
// Format seconds to MM:SS format
|
26 |
+
function formatTime(seconds) {
|
27 |
+
const minutes = Math.floor(seconds / 60);
|
28 |
+
const secs = Math.floor(seconds % 60);
|
29 |
+
return `${minutes.toString().padStart(2, '0')}:${secs.toString().padStart(2, '0')}`;
|
30 |
+
}
|
31 |
+
|
32 |
+
// Error handling function
|
33 |
+
function handleError(error) {
|
34 |
+
console.error('Error:', error);
|
35 |
+
return `<div role="alert" class="alert alert-error">
|
36 |
+
<svg xmlns="http://www.w3.org/2000/svg" class="stroke-current shrink-0 h-6 w-6" fill="none" viewBox="0 0 24 24">
|
37 |
+
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M10 14l2-2m0 0l2-2m-2 2l-2-2m2 2l2 2m7-2a9 9 0 11-18 0 9 9 0 0118 0z" />
|
38 |
+
</svg>
|
39 |
+
<span>Error: ${error.message || 'Something went wrong'}</span>
|
40 |
+
<div>
|
41 |
+
<button class="btn btn-sm btn-ghost" onclick="window.location.reload()">Retry</button>
|
42 |
+
</div>
|
43 |
+
</div>`;
|
44 |
+
}
|
45 |
+
|
46 |
+
// Toast notification function
|
47 |
+
function showToast(message, type = 'info') {
|
48 |
+
const toast = document.createElement('div');
|
49 |
+
toast.className = `alert alert-${type} fixed bottom-4 right-4 max-w-xs z-50 shadow-lg`;
|
50 |
+
|
51 |
+
// Different icon based on type
|
52 |
+
let icon = '';
|
53 |
+
switch(type) {
|
54 |
+
case 'success':
|
55 |
+
icon = `<svg xmlns="http://www.w3.org/2000/svg" class="stroke-current shrink-0 h-6 w-6" fill="none" viewBox="0 0 24 24">
|
56 |
+
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M9 12l2 2 4-4m6 2a9 9 0 11-18 0 9 9 0 0118 0z" />
|
57 |
+
</svg>`;
|
58 |
+
break;
|
59 |
+
case 'warning':
|
60 |
+
icon = `<svg xmlns="http://www.w3.org/2000/svg" class="stroke-current shrink-0 h-6 w-6" fill="none" viewBox="0 0 24 24">
|
61 |
+
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M12 9v2m0 4h.01m-6.938 4h13.856c1.54 0 2.502-1.667 1.732-3L13.732 4c-.77-1.333-2.694-1.333-3.464 0L3.34 16c-.77 1.333.192 3 1.732 3z" />
|
62 |
+
</svg>`;
|
63 |
+
break;
|
64 |
+
case 'error':
|
65 |
+
icon = `<svg xmlns="http://www.w3.org/2000/svg" class="stroke-current shrink-0 h-6 w-6" fill="none" viewBox="0 0 24 24">
|
66 |
+
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M10 14l2-2m0 0l2-2m-2 2l-2-2m2 2l2 2m7-2a9 9 0 11-18 0 9 9 0 0118 0z" />
|
67 |
+
</svg>`;
|
68 |
+
break;
|
69 |
+
default: // info
|
70 |
+
icon = `<svg xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 24 24" class="stroke-current shrink-0 w-6 h-6">
|
71 |
+
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M13 16h-1v-4h-1m1-4h.01M21 12a9 9 0 11-18 0 9 9 0 0118 0z"></path>
|
72 |
+
</svg>`;
|
73 |
+
}
|
74 |
+
|
75 |
+
toast.innerHTML = `
|
76 |
+
${icon}
|
77 |
+
<span>${message}</span>
|
78 |
+
<div>
|
79 |
+
<button class="btn btn-sm btn-ghost" onclick="this.parentElement.parentElement.remove()">
|
80 |
+
<svg xmlns="http://www.w3.org/2000/svg" class="h-4 w-4" fill="none" viewBox="0 0 24 24" stroke="currentColor">
|
81 |
+
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M6 18L18 6M6 6l12 12" />
|
82 |
+
</svg>
|
83 |
+
</button>
|
84 |
+
</div>
|
85 |
+
`;
|
86 |
+
document.body.appendChild(toast);
|
87 |
+
|
88 |
+
// Auto-dismiss after 3 seconds
|
89 |
+
setTimeout(() => {
|
90 |
+
toast.classList.add('opacity-0', 'transition-opacity', 'duration-500');
|
91 |
+
setTimeout(() => toast.remove(), 500);
|
92 |
+
}, 3000);
|
93 |
+
}
|
94 |
+
|
95 |
+
// Extract video ID from YouTube URL
|
96 |
+
function extractVideoId(url) {
|
97 |
+
const regExp = /^.*((youtu.be\/)|(v\/)|(\/u\/\w\/)|(embed\/)|(watch\?))\??v?=?([^#&?]*).*/;
|
98 |
+
const match = url.match(regExp);
|
99 |
+
return (match && match[7].length === 11) ? match[7] : null;
|
100 |
+
}
|
101 |
+
|
102 |
+
// Load recent videos into the footer from the API
|
103 |
+
function loadFooterRecentVideos() {
|
104 |
+
const footerRecentVideos = document.getElementById('footer-recent-videos');
|
105 |
+
if (!footerRecentVideos) return;
|
106 |
+
|
107 |
+
// Show loading state
|
108 |
+
footerRecentVideos.innerHTML = '<p class="text-sm opacity-70">Loading recent videos...</p>';
|
109 |
+
|
110 |
+
// Fetch recent videos from server API
|
111 |
+
fetch('/api/video/recent?limit=3')
|
112 |
+
.then(response => {
|
113 |
+
if (!response.ok) {
|
114 |
+
throw new Error('Failed to fetch recent videos');
|
115 |
+
}
|
116 |
+
return response.json();
|
117 |
+
})
|
118 |
+
.then(videos => {
|
119 |
+
if (videos && videos.length > 0) {
|
120 |
+
// Generate HTML for recent videos
|
121 |
+
const videoLinks = videos.map(video => {
|
122 |
+
return `
|
123 |
+
<a href="/video/${video.video_id}" class="link link-hover block py-1 truncate">
|
124 |
+
<span class="text-xs text-primary">▶</span> ${video.title || `Video ${video.video_id}`}
|
125 |
+
</a>
|
126 |
+
`;
|
127 |
+
}).join('');
|
128 |
+
|
129 |
+
// Add videos to the footer
|
130 |
+
footerRecentVideos.innerHTML = videoLinks;
|
131 |
+
} else {
|
132 |
+
footerRecentVideos.innerHTML = '<p class="text-sm opacity-70">No recent videos</p>';
|
133 |
+
}
|
134 |
+
})
|
135 |
+
.catch(error => {
|
136 |
+
console.error('Error loading footer videos:', error);
|
137 |
+
footerRecentVideos.innerHTML = '<p class="text-sm opacity-70">Failed to load recent videos</p>';
|
138 |
+
});
|
139 |
+
}
|
app/static/js/video.js
ADDED
@@ -0,0 +1,440 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
// Video page functionality
|
2 |
+
document.addEventListener('DOMContentLoaded', () => {
|
3 |
+
const playerElement = document.getElementById('youtube-player');
|
4 |
+
const searchInput = document.getElementById('search-input');
|
5 |
+
const searchButton = document.getElementById('search-button');
|
6 |
+
const transcriptContainer = document.getElementById('transcript-container');
|
7 |
+
const loadingIndicator = document.getElementById('loading');
|
8 |
+
const toggleTranscriptButton = document.getElementById('toggle-transcript');
|
9 |
+
|
10 |
+
let transcriptSegments = [];
|
11 |
+
let ytPlayer = null;
|
12 |
+
let isProcessingUrl = false;
|
13 |
+
|
14 |
+
// Check if there's a search query in the URL
|
15 |
+
const urlParams = new URLSearchParams(window.location.search);
|
16 |
+
const searchQuery = urlParams.get('q');
|
17 |
+
const processingUrl = urlParams.get('processing');
|
18 |
+
|
19 |
+
// Format time to display as HH:MM:SS
|
20 |
+
function formatTime(seconds) {
|
21 |
+
const hours = Math.floor(seconds / 3600);
|
22 |
+
const mins = Math.floor((seconds % 3600) / 60);
|
23 |
+
const secs = Math.floor(seconds % 60);
|
24 |
+
|
25 |
+
if (hours > 0) {
|
26 |
+
return `${hours}:${mins.toString().padStart(2, '0')}:${secs.toString().padStart(2, '0')}`;
|
27 |
+
} else {
|
28 |
+
return `${mins}:${secs.toString().padStart(2, '0')}`;
|
29 |
+
}
|
30 |
+
}
|
31 |
+
|
32 |
+
// Handle error display
|
33 |
+
function handleError(error) {
|
34 |
+
console.error(error);
|
35 |
+
return `<div class="alert alert-error">Error: ${error.message}</div>`;
|
36 |
+
}
|
37 |
+
|
38 |
+
// Initialize YouTube iframe API
|
39 |
+
function initYouTubePlayer() {
|
40 |
+
// Get the existing iframe
|
41 |
+
const iframeId = playerElement.getAttribute('id');
|
42 |
+
|
43 |
+
// Load the YouTube iframe API if it's not already loaded
|
44 |
+
if (!window.YT) {
|
45 |
+
const tag = document.createElement('script');
|
46 |
+
tag.src = 'https://www.youtube.com/iframe_api';
|
47 |
+
const firstScriptTag = document.getElementsByTagName('script')[0];
|
48 |
+
firstScriptTag.parentNode.insertBefore(tag, firstScriptTag);
|
49 |
+
|
50 |
+
window.onYouTubeIframeAPIReady = function() {
|
51 |
+
createYouTubePlayer(iframeId);
|
52 |
+
};
|
53 |
+
} else {
|
54 |
+
createYouTubePlayer(iframeId);
|
55 |
+
}
|
56 |
+
}
|
57 |
+
|
58 |
+
// Create YouTube player object
|
59 |
+
function createYouTubePlayer(iframeId) {
|
60 |
+
ytPlayer = new YT.Player(iframeId, {
|
61 |
+
events: {
|
62 |
+
'onReady': onPlayerReady
|
63 |
+
}
|
64 |
+
});
|
65 |
+
}
|
66 |
+
|
67 |
+
// When player is ready
|
68 |
+
function onPlayerReady(event) {
|
69 |
+
console.log('Player ready');
|
70 |
+
}
|
71 |
+
|
72 |
+
// Load transcript segments
|
73 |
+
function loadTranscript() {
|
74 |
+
transcriptContainer.innerHTML = '<div class="flex justify-center my-4"><span class="loading loading-spinner loading-md"></span><span class="ml-2">Loading transcript...</span></div>';
|
75 |
+
|
76 |
+
// Check if video ID is valid before making API call
|
77 |
+
if (!videoId || videoId === 'undefined' || videoId === 'null') {
|
78 |
+
transcriptContainer.innerHTML = `
|
79 |
+
<div class="alert alert-error">
|
80 |
+
<div>
|
81 |
+
<svg xmlns="http://www.w3.org/2000/svg" class="stroke-current shrink-0 h-6 w-6" fill="none" viewBox="0 0 24 24"><path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M10 14l2-2m0 0l2-2m-2 2l-2-2m2 2l2 2m7-2a9 9 0 11-18 0 9 9 0 0118 0z" /></svg>
|
82 |
+
<span>Invalid video ID. Please return to the home page and select a valid video.</span>
|
83 |
+
</div>
|
84 |
+
</div>
|
85 |
+
`;
|
86 |
+
return;
|
87 |
+
}
|
88 |
+
|
89 |
+
fetch(`/api/video/segments/${videoId}`)
|
90 |
+
.then(response => {
|
91 |
+
if (!response.ok) {
|
92 |
+
throw new Error('Failed to load transcript: ' + response.status);
|
93 |
+
}
|
94 |
+
return response.json();
|
95 |
+
})
|
96 |
+
.then(segments => {
|
97 |
+
transcriptSegments = segments;
|
98 |
+
|
99 |
+
if (!segments || segments.length === 0) {
|
100 |
+
transcriptContainer.innerHTML = `
|
101 |
+
<div class="alert alert-info">
|
102 |
+
<div>
|
103 |
+
<svg xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 24 24" class="stroke-current shrink-0 w-6 h-6"><path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M13 16h-1v-4h-1m1-4h.01M21 12a9 9 0 11-18 0 9 9 0 0118 0z"></path></svg>
|
104 |
+
<span>No transcript available for this video. Try processing the video first from the home page.</span>
|
105 |
+
</div>
|
106 |
+
</div>
|
107 |
+
`;
|
108 |
+
} else {
|
109 |
+
displayTranscript(segments);
|
110 |
+
}
|
111 |
+
})
|
112 |
+
.catch(error => {
|
113 |
+
console.error('Error loading transcript:', error);
|
114 |
+
transcriptContainer.innerHTML = `
|
115 |
+
<div class="alert alert-error">
|
116 |
+
<div>
|
117 |
+
<svg xmlns="http://www.w3.org/2000/svg" class="stroke-current shrink-0 h-6 w-6" fill="none" viewBox="0 0 24 24"><path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M10 14l2-2m0 0l2-2m-2 2l-2-2m2 2l2 2m7-2a9 9 0 11-18 0 9 9 0 0118 0z" /></svg>
|
118 |
+
<span>Error loading transcript: ${error.message}</span>
|
119 |
+
</div>
|
120 |
+
</div>
|
121 |
+
<p class="mt-4">This may happen if:</p>
|
122 |
+
<ul class="list-disc ml-8 mt-2">
|
123 |
+
<li>The video hasn't been processed yet</li>
|
124 |
+
<li>The video ID is incorrect</li>
|
125 |
+
<li>The server is experiencing issues</li>
|
126 |
+
</ul>
|
127 |
+
<p class="mt-4">Try processing this video from the home page first.</p>
|
128 |
+
`;
|
129 |
+
});
|
130 |
+
}
|
131 |
+
|
132 |
+
// Display transcript segments
|
133 |
+
function displayTranscript(segments) {
|
134 |
+
const html = segments.map((segment, index) => {
|
135 |
+
const formattedTime = formatTime(segment.start);
|
136 |
+
|
137 |
+
return `
|
138 |
+
<div class="transcript-segment" data-start="${segment.start}" data-end="${segment.end}" data-index="${index}">
|
139 |
+
<span class="timestamp">${formattedTime}</span>
|
140 |
+
<span class="segment-text">${segment.text}</span>
|
141 |
+
</div>
|
142 |
+
`;
|
143 |
+
}).join('');
|
144 |
+
|
145 |
+
transcriptContainer.innerHTML = html;
|
146 |
+
|
147 |
+
// Add click handlers to segments
|
148 |
+
document.querySelectorAll('.transcript-segment').forEach(segment => {
|
149 |
+
segment.addEventListener('click', () => {
|
150 |
+
const startTime = parseFloat(segment.dataset.start);
|
151 |
+
seekToTime(startTime);
|
152 |
+
});
|
153 |
+
});
|
154 |
+
}
|
155 |
+
|
156 |
+
// Seek to specific time in the video
|
157 |
+
function seekToTime(seconds) {
|
158 |
+
console.log('Seeking to time:', seconds);
|
159 |
+
|
160 |
+
if (ytPlayer && typeof ytPlayer.seekTo === 'function') {
|
161 |
+
try {
|
162 |
+
// Ensure seconds is a number
|
163 |
+
seconds = parseFloat(seconds);
|
164 |
+
|
165 |
+
// Seek to time
|
166 |
+
ytPlayer.seekTo(seconds, true);
|
167 |
+
|
168 |
+
// Try to play the video (may be blocked by browser autoplay policies)
|
169 |
+
try {
|
170 |
+
ytPlayer.playVideo();
|
171 |
+
} catch (e) {
|
172 |
+
console.warn('Could not autoplay video:', e);
|
173 |
+
}
|
174 |
+
|
175 |
+
// Highlight the current segment
|
176 |
+
highlightSegment(seconds);
|
177 |
+
} catch (error) {
|
178 |
+
console.error('Error seeking to time:', error);
|
179 |
+
}
|
180 |
+
} else {
|
181 |
+
console.error('YouTube player is not ready yet or seekTo method is not available');
|
182 |
+
}
|
183 |
+
}
|
184 |
+
|
185 |
+
// Highlight segment containing the current time
|
186 |
+
function highlightSegment(time) {
|
187 |
+
// Remove highlight from all segments
|
188 |
+
document.querySelectorAll('.transcript-segment').forEach(segment => {
|
189 |
+
segment.classList.remove('highlight');
|
190 |
+
});
|
191 |
+
|
192 |
+
// Find the segment containing current time
|
193 |
+
// Need to find by approximate match since floating point exact matches may not work
|
194 |
+
const segments = document.querySelectorAll('.transcript-segment');
|
195 |
+
let currentSegment = null;
|
196 |
+
|
197 |
+
for (const segment of segments) {
|
198 |
+
const start = parseFloat(segment.dataset.start);
|
199 |
+
const end = parseFloat(segment.dataset.end);
|
200 |
+
|
201 |
+
if (time >= start && time <= end) {
|
202 |
+
currentSegment = segment;
|
203 |
+
break;
|
204 |
+
}
|
205 |
+
}
|
206 |
+
|
207 |
+
// If exact time match not found, find the closest segment
|
208 |
+
if (!currentSegment) {
|
209 |
+
const exactMatch = document.querySelector(`.transcript-segment[data-start="${time}"]`);
|
210 |
+
if (exactMatch) {
|
211 |
+
currentSegment = exactMatch;
|
212 |
+
}
|
213 |
+
}
|
214 |
+
|
215 |
+
if (currentSegment) {
|
216 |
+
currentSegment.classList.add('highlight');
|
217 |
+
currentSegment.scrollIntoView({ behavior: 'smooth', block: 'center' });
|
218 |
+
}
|
219 |
+
}
|
220 |
+
|
221 |
+
// Search functionality
|
222 |
+
searchButton.addEventListener('click', performSearch);
|
223 |
+
searchInput.addEventListener('keypress', e => {
|
224 |
+
if (e.key === 'Enter') performSearch();
|
225 |
+
});
|
226 |
+
|
227 |
+
function performSearch() {
|
228 |
+
const query = searchInput.value.trim();
|
229 |
+
if (!query) {
|
230 |
+
transcriptContainer.innerHTML = '<div class="alert alert-warning">Please enter a search query</div>';
|
231 |
+
return;
|
232 |
+
}
|
233 |
+
|
234 |
+
// Validate video ID before searching
|
235 |
+
if (!videoId || videoId === 'undefined' || videoId === 'null') {
|
236 |
+
transcriptContainer.innerHTML = `
|
237 |
+
<div class="alert alert-error">
|
238 |
+
<div>
|
239 |
+
<svg xmlns="http://www.w3.org/2000/svg" class="stroke-current shrink-0 h-6 w-6" fill="none" viewBox="0 0 24 24"><path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M10 14l2-2m0 0l2-2m-2 2l-2-2m2 2l2 2m7-2a9 9 0 11-18 0 9 9 0 0118 0z" /></svg>
|
240 |
+
<span>Invalid video ID. Please return to the home page and select a valid video.</span>
|
241 |
+
</div>
|
242 |
+
</div>
|
243 |
+
`;
|
244 |
+
return;
|
245 |
+
}
|
246 |
+
|
247 |
+
// Show loading indicator
|
248 |
+
loadingIndicator.classList.remove('hidden');
|
249 |
+
|
250 |
+
// Send search request
|
251 |
+
fetch(`/api/video/search?query=${encodeURIComponent(query)}&video_id=${videoId}`)
|
252 |
+
.then(response => {
|
253 |
+
if (!response.ok) {
|
254 |
+
throw new Error('Search failed');
|
255 |
+
}
|
256 |
+
return response.json();
|
257 |
+
})
|
258 |
+
.then(results => {
|
259 |
+
// Hide loading indicator
|
260 |
+
loadingIndicator.classList.add('hidden');
|
261 |
+
|
262 |
+
if (results.length === 0) {
|
263 |
+
// Show "no results" message in transcript container
|
264 |
+
transcriptContainer.innerHTML = `
|
265 |
+
<div role="alert" class="alert alert-info">
|
266 |
+
<svg xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 24 24" class="stroke-current shrink-0 w-6 h-6">
|
267 |
+
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M13 16h-1v-4h-1m1-4h.01M21 12a9 9 0 11-18 0 9 9 0 0118 0z"></path>
|
268 |
+
</svg>
|
269 |
+
<span>No results found for "${query}". <a href="#" id="reset-search" class="link link-primary">Show all transcript</a></span>
|
270 |
+
</div>`;
|
271 |
+
|
272 |
+
// Add click handler to reset search link
|
273 |
+
document.getElementById('reset-search').addEventListener('click', (e) => {
|
274 |
+
e.preventDefault();
|
275 |
+
resetTranscriptFilter();
|
276 |
+
displayTranscript(transcriptSegments);
|
277 |
+
});
|
278 |
+
|
279 |
+
return;
|
280 |
+
}
|
281 |
+
|
282 |
+
// Display search results as filtered transcript
|
283 |
+
filterTranscript(results);
|
284 |
+
|
285 |
+
// Add a header with search info and reset option
|
286 |
+
const searchInfoHeader = document.createElement('div');
|
287 |
+
searchInfoHeader.className = 'mb-4 flex justify-between items-center';
|
288 |
+
searchInfoHeader.innerHTML = `
|
289 |
+
<div class="badge badge-accent">${results.length} results for "${query}"</div>
|
290 |
+
<a href="#" id="reset-search" class="link link-primary text-sm">Show all transcript</a>
|
291 |
+
`;
|
292 |
+
|
293 |
+
// Insert the header before transcript segments
|
294 |
+
transcriptContainer.insertBefore(searchInfoHeader, transcriptContainer.firstChild);
|
295 |
+
|
296 |
+
// Add click handler to reset search link
|
297 |
+
document.getElementById('reset-search').addEventListener('click', (e) => {
|
298 |
+
e.preventDefault();
|
299 |
+
resetTranscriptFilter();
|
300 |
+
displayTranscript(transcriptSegments);
|
301 |
+
});
|
302 |
+
})
|
303 |
+
.catch(error => {
|
304 |
+
// Hide loading indicator
|
305 |
+
loadingIndicator.classList.add('hidden');
|
306 |
+
|
307 |
+
// Show error
|
308 |
+
transcriptContainer.innerHTML = handleError(error);
|
309 |
+
});
|
310 |
+
}
|
311 |
+
|
312 |
+
// Filter transcript to show only matching segments
|
313 |
+
function filterTranscript(results) {
|
314 |
+
// Create a highlighted version of the transcript with only matching segments
|
315 |
+
const html = results.map(result => {
|
316 |
+
const segment = result.segment;
|
317 |
+
const formattedTime = formatTime(segment.start);
|
318 |
+
const score = (result.score * 100).toFixed(0);
|
319 |
+
const index = transcriptSegments.findIndex(s => s.segment_id === segment.segment_id);
|
320 |
+
|
321 |
+
return `
|
322 |
+
<div class="transcript-segment search-result" data-start="${segment.start}" data-end="${segment.end}" data-index="${index}">
|
323 |
+
<div class="flex justify-between items-center">
|
324 |
+
<span class="timestamp">${formattedTime}</span>
|
325 |
+
<div class="badge badge-primary">${score}% match</div>
|
326 |
+
</div>
|
327 |
+
<span class="segment-text mt-1">${segment.text}</span>
|
328 |
+
</div>
|
329 |
+
`;
|
330 |
+
}).join('');
|
331 |
+
|
332 |
+
// Replace transcript with filtered results
|
333 |
+
transcriptContainer.innerHTML = html;
|
334 |
+
|
335 |
+
// Add click handlers to segments
|
336 |
+
document.querySelectorAll('.transcript-segment').forEach(segment => {
|
337 |
+
segment.addEventListener('click', () => {
|
338 |
+
const startTime = parseFloat(segment.dataset.start);
|
339 |
+
seekToTime(startTime);
|
340 |
+
});
|
341 |
+
});
|
342 |
+
}
|
343 |
+
|
344 |
+
// Transcript is always visible - toggle functionality removed
|
345 |
+
|
346 |
+
// Reset transcript filter to show all segments
|
347 |
+
function resetTranscriptFilter() {
|
348 |
+
searchInput.value = '';
|
349 |
+
}
|
350 |
+
|
351 |
+
// Show processing indicator if URL was just processed
|
352 |
+
function showProcessingIndicator() {
|
353 |
+
if (processingUrl === 'true') {
|
354 |
+
isProcessingUrl = true;
|
355 |
+
transcriptContainer.innerHTML = `
|
356 |
+
<div class="flex items-center justify-center my-4">
|
357 |
+
<span class="loading loading-spinner loading-md text-primary"></span>
|
358 |
+
<span class="ml-2">Processing video from URL... This may take a few moments</span>
|
359 |
+
</div>
|
360 |
+
`;
|
361 |
+
|
362 |
+
// Check for segments every second
|
363 |
+
const processingInterval = setInterval(() => {
|
364 |
+
// Validate video ID before making API call
|
365 |
+
if (!videoId || videoId === 'undefined' || videoId === 'null') {
|
366 |
+
clearInterval(processingInterval);
|
367 |
+
transcriptContainer.innerHTML = `
|
368 |
+
<div class="alert alert-error">
|
369 |
+
<div>
|
370 |
+
<svg xmlns="http://www.w3.org/2000/svg" class="stroke-current shrink-0 h-6 w-6" fill="none" viewBox="0 0 24 24"><path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M10 14l2-2m0 0l2-2m-2 2l-2-2m2 2l2 2m7-2a9 9 0 11-18 0 9 9 0 0118 0z" /></svg>
|
371 |
+
<span>Invalid video ID. Please return to the home page and select a valid video.</span>
|
372 |
+
</div>
|
373 |
+
</div>
|
374 |
+
`;
|
375 |
+
return;
|
376 |
+
}
|
377 |
+
|
378 |
+
fetch(`/api/video/segments/${videoId}`)
|
379 |
+
.then(response => {
|
380 |
+
if (!response.ok) {
|
381 |
+
return null;
|
382 |
+
}
|
383 |
+
return response.json();
|
384 |
+
})
|
385 |
+
.then(segments => {
|
386 |
+
if (segments && segments.length > 0) {
|
387 |
+
clearInterval(processingInterval);
|
388 |
+
isProcessingUrl = false;
|
389 |
+
loadTranscript();
|
390 |
+
}
|
391 |
+
})
|
392 |
+
.catch(error => {
|
393 |
+
console.error('Error checking segments:', error);
|
394 |
+
});
|
395 |
+
}, 2000);
|
396 |
+
|
397 |
+
// Set timeout to stop checking after 2 minutes
|
398 |
+
setTimeout(() => {
|
399 |
+
clearInterval(processingInterval);
|
400 |
+
if (isProcessingUrl) {
|
401 |
+
transcriptContainer.innerHTML = `
|
402 |
+
<div class="alert alert-warning">
|
403 |
+
<div>
|
404 |
+
<svg xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 24 24" class="stroke-current shrink-0 w-6 h-6"><path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M13 16h-1v-4h-1m1-4h.01M21 12a9 9 0 11-18 0 9 9 0 0118 0z"></path></svg>
|
405 |
+
<span>Processing is taking longer than expected. Refresh the page to check progress.</span>
|
406 |
+
</div>
|
407 |
+
</div>
|
408 |
+
`;
|
409 |
+
isProcessingUrl = false;
|
410 |
+
}
|
411 |
+
}, 120000);
|
412 |
+
|
413 |
+
return true;
|
414 |
+
}
|
415 |
+
return false;
|
416 |
+
}
|
417 |
+
|
418 |
+
// Initialize
|
419 |
+
initYouTubePlayer();
|
420 |
+
|
421 |
+
// Show processing indicator or load transcript
|
422 |
+
if (!showProcessingIndicator()) {
|
423 |
+
loadTranscript();
|
424 |
+
}
|
425 |
+
|
426 |
+
// If there's a search query in the URL, apply it after transcript loads
|
427 |
+
if (searchQuery) {
|
428 |
+
const checkTranscriptInterval = setInterval(() => {
|
429 |
+
if (transcriptSegments.length > 0) {
|
430 |
+
clearInterval(checkTranscriptInterval);
|
431 |
+
// Set the search input value and trigger search
|
432 |
+
searchInput.value = searchQuery;
|
433 |
+
performSearch();
|
434 |
+
}
|
435 |
+
}, 500);
|
436 |
+
|
437 |
+
// Set timeout to stop checking after 10 seconds
|
438 |
+
setTimeout(() => clearInterval(checkTranscriptInterval), 10000);
|
439 |
+
}
|
440 |
+
});
|
app/templates/base.html
ADDED
@@ -0,0 +1,82 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
<!DOCTYPE html>
|
2 |
+
<html lang="en" data-theme="light">
|
3 |
+
<head>
|
4 |
+
<meta charset="UTF-8">
|
5 |
+
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
6 |
+
<title>{{ title }}</title>
|
7 |
+
<link href="https://cdn.jsdelivr.net/npm/[email protected]/dist/full.css" rel="stylesheet" type="text/css" />
|
8 |
+
<script src="https://cdn.tailwindcss.com"></script>
|
9 |
+
<link rel="stylesheet" href="{{ url_for('static', path='/css/style.css') }}">
|
10 |
+
</head>
|
11 |
+
<body class="min-h-screen flex flex-col">
|
12 |
+
<!-- Header/Navbar -->
|
13 |
+
<div class="navbar bg-base-200 shadow-md">
|
14 |
+
<div class="navbar-start">
|
15 |
+
<a href="/" class="btn btn-ghost text-xl">
|
16 |
+
<svg xmlns="http://www.w3.org/2000/svg" class="h-6 w-6 mr-2" fill="none" viewBox="0 0 24 24" stroke="currentColor">
|
17 |
+
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M15 10l4.553-2.276A1 1 0 0121 8.618v6.764a1 1 0 01-1.447.894L15 14M5 18h8a2 2 0 002-2V8a2 2 0 00-2-2H5a2 2 0 00-2 2v8a2 2 0 002 2z" />
|
18 |
+
</svg>
|
19 |
+
In-Video Search
|
20 |
+
</a>
|
21 |
+
</div>
|
22 |
+
<div class="navbar-center">
|
23 |
+
<div class="form-control">
|
24 |
+
<div class="join">
|
25 |
+
<input type="text" id="global-search" placeholder="Search videos..." class="input input-bordered join-item w-full md:w-96" />
|
26 |
+
<button id="global-search-button" class="btn btn-primary join-item">
|
27 |
+
<svg xmlns="http://www.w3.org/2000/svg" class="h-5 w-5" fill="none" viewBox="0 0 24 24" stroke="currentColor">
|
28 |
+
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M21 21l-6-6m2-5a7 7 0 11-14 0 7 7 0 0114 0z" />
|
29 |
+
</svg>
|
30 |
+
</button>
|
31 |
+
</div>
|
32 |
+
</div>
|
33 |
+
</div>
|
34 |
+
<div class="navbar-end">
|
35 |
+
<div class="dropdown dropdown-end">
|
36 |
+
<div tabindex="0" role="button" class="btn btn-ghost btn-circle">
|
37 |
+
<svg xmlns="http://www.w3.org/2000/svg" class="h-5 w-5" fill="none" viewBox="0 0 24 24" stroke="currentColor">
|
38 |
+
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M12 3v1m0 16v1m9-9h-1M4 12H3m15.364 6.364l-.707-.707M6.343 6.343l-.707-.707m12.728 0l-.707.707M6.343 17.657l-.707.707M16 12a4 4 0 11-8 0 4 4 0 018 0z" />
|
39 |
+
</svg>
|
40 |
+
</div>
|
41 |
+
<ul tabindex="0" class="dropdown-content z-[1] menu p-2 shadow bg-base-100 rounded-box w-52">
|
42 |
+
<li><button class="theme-item" data-theme="light">Light</button></li>
|
43 |
+
<li><button class="theme-item" data-theme="dark">Dark</button></li>
|
44 |
+
<li><button class="theme-item" data-theme="cupcake">Cupcake</button></li>
|
45 |
+
<li><button class="theme-item" data-theme="synthwave">Synthwave</button></li>
|
46 |
+
</ul>
|
47 |
+
</div>
|
48 |
+
</div>
|
49 |
+
</div>
|
50 |
+
|
51 |
+
<!-- Main Content -->
|
52 |
+
<main class="container mx-auto px-4 py-8 flex-grow">
|
53 |
+
{% block content %}{% endblock %}
|
54 |
+
</main>
|
55 |
+
|
56 |
+
<!-- Footer -->
|
57 |
+
<footer class="footer p-10 bg-base-200 text-base-content">
|
58 |
+
<div>
|
59 |
+
<span class="footer-title">In-Video Search</span>
|
60 |
+
<p>Powered by Qdrant & FastAPI</p>
|
61 |
+
<p>Search through video content semantically</p>
|
62 |
+
</div>
|
63 |
+
<div>
|
64 |
+
<span class="footer-title">Recent Videos</span>
|
65 |
+
<div id="footer-recent-videos">
|
66 |
+
<!-- Recent videos will be loaded here by JavaScript -->
|
67 |
+
<p class="text-sm opacity-70">No recent videos</p>
|
68 |
+
</div>
|
69 |
+
</div>
|
70 |
+
<div>
|
71 |
+
<span class="footer-title">Resources</span>
|
72 |
+
<a class="link link-hover" href="https://qdrant.tech/" target="_blank">Qdrant</a>
|
73 |
+
<a class="link link-hover" href="https://fastapi.tiangolo.com/" target="_blank">FastAPI</a>
|
74 |
+
<a class="link link-hover" href="https://daisyui.com/" target="_blank">DaisyUI</a>
|
75 |
+
</div>
|
76 |
+
</footer>
|
77 |
+
|
78 |
+
<!-- Scripts -->
|
79 |
+
<script src="{{ url_for('static', path='/js/main.js') }}"></script>
|
80 |
+
{% block scripts %}{% endblock %}
|
81 |
+
</body>
|
82 |
+
</html>
|
app/templates/index.html
ADDED
@@ -0,0 +1,98 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{% extends "base.html" %}
|
2 |
+
|
3 |
+
{% block content %}
|
4 |
+
<div class="max-w-4xl mx-auto">
|
5 |
+
<div class="card bg-base-100 shadow-xl">
|
6 |
+
<div class="card-body">
|
7 |
+
<h2 class="card-title">Process YouTube Video</h2>
|
8 |
+
<p class="text-gray-600 mb-4">Enter a YouTube URL to process its transcript for searching</p>
|
9 |
+
|
10 |
+
<div class="form-control">
|
11 |
+
<label class="label">
|
12 |
+
<span class="label-text">Enter YouTube URL</span>
|
13 |
+
</label>
|
14 |
+
<div class="join w-full">
|
15 |
+
<input type="text" id="youtube-url" placeholder="https://www.youtube.com/watch?v=..." class="input input-bordered join-item w-full" />
|
16 |
+
<button id="process-button" class="btn btn-primary join-item">Process</button>
|
17 |
+
</div>
|
18 |
+
</div>
|
19 |
+
|
20 |
+
<div class="mt-4" id="process-status">
|
21 |
+
<!-- Processing status messages will appear here -->
|
22 |
+
</div>
|
23 |
+
|
24 |
+
<div class="divider">OR</div>
|
25 |
+
|
26 |
+
<h3 class="font-bold mb-2">Example Videos</h3>
|
27 |
+
<div class="grid grid-cols-1 md:grid-cols-3 gap-2">
|
28 |
+
<button class="btn btn-outline btn-accent btn-sm example-video w-full h-auto" data-url="https://www.youtube.com/watch?v=zjkBMFhNj_g">
|
29 |
+
<div class="flex items-center w-full">
|
30 |
+
<svg xmlns="http://www.w3.org/2000/svg" class="h-4 w-4 mr-2 flex-shrink-0" fill="none" viewBox="0 0 24 24" stroke="currentColor">
|
31 |
+
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M14.752 11.168l-3.197-2.132A1 1 0 0010 9.87v4.263a1 1 0 001.555.832l3.197-2.132a1 1 0 000-1.664z" />
|
32 |
+
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M21 12a9 9 0 11-18 0 9 9 0 0118 0z" />
|
33 |
+
</svg>
|
34 |
+
<span class="truncate text-left">Intro to Large Language Models</span>
|
35 |
+
</div>
|
36 |
+
</button>
|
37 |
+
<button class="btn btn-outline btn-accent btn-sm example-video w-full h-auto" data-url="https://www.youtube.com/watch?v=7xTGNNLPyMI">
|
38 |
+
<div class="flex items-center w-full">
|
39 |
+
<svg xmlns="http://www.w3.org/2000/svg" class="h-4 w-4 mr-2 flex-shrink-0" fill="none" viewBox="0 0 24 24" stroke="currentColor">
|
40 |
+
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M14.752 11.168l-3.197-2.132A1 1 0 0010 9.87v4.263a1 1 0 001.555.832l3.197-2.132a1 1 0 000-1.664z" />
|
41 |
+
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M21 12a9 9 0 11-18 0 9 9 0 0118 0z" />
|
42 |
+
</svg>
|
43 |
+
<span class="truncate text-left">Deep Dive into LLMs like ChatGPT</span>
|
44 |
+
</div>
|
45 |
+
</button>
|
46 |
+
<button class="btn btn-outline btn-accent btn-sm example-video w-full h-auto" data-url="https://www.youtube.com/watch?v=EWvNQjAaOHw">
|
47 |
+
<div class="flex items-center w-full">
|
48 |
+
<svg xmlns="http://www.w3.org/2000/svg" class="h-4 w-4 mr-2 flex-shrink-0" fill="none" viewBox="0 0 24 24" stroke="currentColor">
|
49 |
+
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M14.752 11.168l-3.197-2.132A1 1 0 0010 9.87v4.263a1 1 0 001.555.832l3.197-2.132a1 1 0 000-1.664z" />
|
50 |
+
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M21 12a9 9 0 11-18 0 9 9 0 0118 0z" />
|
51 |
+
</svg>
|
52 |
+
<span class="truncate text-left">How I use LLMs</span>
|
53 |
+
</div>
|
54 |
+
</button>
|
55 |
+
</div>
|
56 |
+
</div>
|
57 |
+
</div>
|
58 |
+
|
59 |
+
<div class="card bg-base-100 shadow-xl mt-6 hidden" id="recently-processed">
|
60 |
+
<div class="card-body">
|
61 |
+
<h2 class="card-title">Recently Processed Videos</h2>
|
62 |
+
<div class="mt-4">
|
63 |
+
<!-- Video carousel with navigation arrows -->
|
64 |
+
<div class="flex items-center gap-2">
|
65 |
+
<!-- Left arrow navigation -->
|
66 |
+
<div class="hidden md:block" id="carousel-prev">
|
67 |
+
<button class="btn btn-circle btn-primary btn-disabled">
|
68 |
+
<svg xmlns="http://www.w3.org/2000/svg" class="h-6 w-6" fill="none" viewBox="0 0 24 24" stroke="currentColor">
|
69 |
+
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M15 19l-7-7 7-7" />
|
70 |
+
</svg>
|
71 |
+
</button>
|
72 |
+
</div>
|
73 |
+
|
74 |
+
<!-- Carousel content -->
|
75 |
+
<div class="carousel carousel-center rounded-box w-full p-2 overflow-x-auto">
|
76 |
+
<div id="video-list" class="flex space-x-4 items-stretch">
|
77 |
+
<!-- Video cards will be populated here as carousel items -->
|
78 |
+
</div>
|
79 |
+
</div>
|
80 |
+
|
81 |
+
<!-- Right arrow navigation -->
|
82 |
+
<div class="hidden md:block" id="carousel-next">
|
83 |
+
<button class="btn btn-circle btn-primary">
|
84 |
+
<svg xmlns="http://www.w3.org/2000/svg" class="h-6 w-6" fill="none" viewBox="0 0 24 24" stroke="currentColor">
|
85 |
+
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M9 5l7 7-7 7" />
|
86 |
+
</svg>
|
87 |
+
</button>
|
88 |
+
</div>
|
89 |
+
</div>
|
90 |
+
</div>
|
91 |
+
</div>
|
92 |
+
</div>
|
93 |
+
</div>
|
94 |
+
{% endblock %}
|
95 |
+
|
96 |
+
{% block scripts %}
|
97 |
+
<script src="{{ url_for('static', path='/js/index.js') }}"></script>
|
98 |
+
{% endblock %}
|
app/templates/video.html
ADDED
@@ -0,0 +1,62 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{% extends "base.html" %}
|
2 |
+
|
3 |
+
{% block content %}
|
4 |
+
<div class="grid grid-cols-1 lg:grid-cols-2 gap-6">
|
5 |
+
<div class="lg:col-span-1">
|
6 |
+
<div class="card bg-base-100 shadow-xl">
|
7 |
+
<div class="card-body p-4">
|
8 |
+
<div class="aspect-video">
|
9 |
+
<iframe id="youtube-player" class="w-full h-full"
|
10 |
+
src="https://www.youtube.com/embed/{{ video_id }}?enablejsapi=1"
|
11 |
+
frameborder="0"
|
12 |
+
allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture"
|
13 |
+
allowfullscreen>
|
14 |
+
</iframe>
|
15 |
+
</div>
|
16 |
+
</div>
|
17 |
+
</div>
|
18 |
+
</div>
|
19 |
+
|
20 |
+
<div class="lg:col-span-1">
|
21 |
+
<div class="card bg-base-100 shadow-xl sticky top-4">
|
22 |
+
<div class="card-body">
|
23 |
+
<div class="flex justify-between items-center">
|
24 |
+
<h2 class="card-title">Video Transcript</h2>
|
25 |
+
</div>
|
26 |
+
|
27 |
+
<div class="form-control mb-4">
|
28 |
+
<label class="label">
|
29 |
+
<span class="label-text">Search in transcript</span>
|
30 |
+
</label>
|
31 |
+
<div class="join w-full">
|
32 |
+
<input type="text" id="search-input" placeholder="Search in this video..." class="input input-bordered join-item w-full" />
|
33 |
+
<button id="search-button" class="btn btn-primary join-item">
|
34 |
+
<svg xmlns="http://www.w3.org/2000/svg" class="h-5 w-5" fill="none" viewBox="0 0 24 24" stroke="currentColor">
|
35 |
+
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M21 21l-6-6m2-5a7 7 0 11-14 0 7 7 0 0114 0z" />
|
36 |
+
</svg>
|
37 |
+
Search
|
38 |
+
</button>
|
39 |
+
</div>
|
40 |
+
</div>
|
41 |
+
|
42 |
+
<div id="loading" class="hidden mt-2 mb-2">
|
43 |
+
<span class="loading loading-spinner loading-md"></span>
|
44 |
+
<span class="ml-2">Searching...</span>
|
45 |
+
</div>
|
46 |
+
|
47 |
+
<div id="transcript-container" class="mt-2 transcript-container">
|
48 |
+
<!-- Transcript will be loaded here -->
|
49 |
+
</div>
|
50 |
+
</div>
|
51 |
+
</div>
|
52 |
+
</div>
|
53 |
+
</div>
|
54 |
+
{% endblock %}
|
55 |
+
|
56 |
+
{% block scripts %}
|
57 |
+
<script>
|
58 |
+
// Store the video ID in a JavaScript variable
|
59 |
+
const videoId = "{{ video_id }}";
|
60 |
+
</script>
|
61 |
+
<script src="{{ url_for('static', path='/js/video.js') }}"></script>
|
62 |
+
{% endblock %}
|
docker-compose.yml
ADDED
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
version: '3.8'
|
2 |
+
|
3 |
+
services:
|
4 |
+
app:
|
5 |
+
build: .
|
6 |
+
ports:
|
7 |
+
- "8000:8000"
|
8 |
+
environment:
|
9 |
+
- QDRANT_URL=http://qdrant:6333
|
10 |
+
- WORKERS=4 # Set number of workers
|
11 |
+
# - QDRANT_API_KEY=your_api_key_here (uncomment and set if needed)
|
12 |
+
depends_on:
|
13 |
+
- qdrant
|
14 |
+
restart: unless-stopped
|
15 |
+
healthcheck:
|
16 |
+
test: ["CMD", "curl", "-f", "http://localhost:8000/"]
|
17 |
+
interval: 30s
|
18 |
+
timeout: 10s
|
19 |
+
retries: 3
|
20 |
+
start_period: 40s
|
21 |
+
|
22 |
+
qdrant:
|
23 |
+
image: qdrant/qdrant:v1.13.5
|
24 |
+
volumes:
|
25 |
+
- qdrant_data:/qdrant/storage
|
26 |
+
|
27 |
+
volumes:
|
28 |
+
qdrant_data:
|
example.env
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
# Qdrant Configuration
|
2 |
+
QDRANT_URL=http://localhost:6333
|
3 |
+
QDRANT_API_KEY=
|
gunicorn.conf.py
ADDED
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import multiprocessing
|
3 |
+
|
4 |
+
# Get the number of workers from environment variable or calculate based on CPU cores
|
5 |
+
workers_env = os.getenv("WORKERS")
|
6 |
+
if workers_env:
|
7 |
+
workers = int(workers_env)
|
8 |
+
else:
|
9 |
+
# Use the recommended formula: (2 * CPU cores) + 1
|
10 |
+
workers = (2 * multiprocessing.cpu_count()) + 1
|
11 |
+
|
12 |
+
# Use Uvicorn worker class for ASGI support
|
13 |
+
worker_class = "uvicorn.workers.UvicornWorker"
|
14 |
+
|
15 |
+
# Bind to 0.0.0.0:8000
|
16 |
+
bind = "0.0.0.0:8000"
|
17 |
+
|
18 |
+
# Logging
|
19 |
+
accesslog = "-" # Log to stdout
|
20 |
+
errorlog = "-" # Log to stderr
|
21 |
+
loglevel = "info"
|
22 |
+
|
23 |
+
# Timeout configuration
|
24 |
+
timeout = 120 # 2 minutes
|
25 |
+
graceful_timeout = 30
|
26 |
+
|
27 |
+
# Worker settings
|
28 |
+
worker_connections = 1000 # Maximum number of connections each worker can handle
|
29 |
+
keepalive = 5 # Seconds to wait between client requests before closing connection
|
30 |
+
|
31 |
+
# For better performance with Uvicorn
|
32 |
+
proc_name = "vibe-coding-rag"
|
poetry.lock
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
pyproject.toml
CHANGED
@@ -7,16 +7,27 @@ readme = "README.md"
|
|
7 |
package-mode = false
|
8 |
|
9 |
[tool.poetry.dependencies]
|
10 |
-
python = "^3.10"
|
11 |
torch = {version = "^2.6.0+cpu", source = "pytorch-cpu"}
|
12 |
sentence-transformers = "^3.4.1"
|
13 |
qdrant-client = "^1.13.3"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
14 |
|
15 |
[[tool.poetry.source]]
|
16 |
name = "pytorch-cpu"
|
17 |
url = "https://download.pytorch.org/whl/cpu"
|
18 |
priority = "explicit"
|
19 |
|
|
|
|
|
|
|
|
|
20 |
[build-system]
|
21 |
requires = ["poetry-core"]
|
22 |
build-backend = "poetry.core.masonry.api"
|
|
|
7 |
package-mode = false
|
8 |
|
9 |
[tool.poetry.dependencies]
|
10 |
+
python = "^3.10,<3.14"
|
11 |
torch = {version = "^2.6.0+cpu", source = "pytorch-cpu"}
|
12 |
sentence-transformers = "^3.4.1"
|
13 |
qdrant-client = "^1.13.3"
|
14 |
+
fastapi = "^0.115.11"
|
15 |
+
uvicorn = "^0.34.0"
|
16 |
+
gunicorn = "^21.2.0"
|
17 |
+
jinja2 = "^3.1.6"
|
18 |
+
youtube-transcript-api = "^1.0.2"
|
19 |
+
pytube = "^15.0.0"
|
20 |
+
yt-dlp = "^2025.2.19"
|
21 |
|
22 |
[[tool.poetry.source]]
|
23 |
name = "pytorch-cpu"
|
24 |
url = "https://download.pytorch.org/whl/cpu"
|
25 |
priority = "explicit"
|
26 |
|
27 |
+
|
28 |
+
[tool.poetry.group.dev.dependencies]
|
29 |
+
ruff = "^0.11.0"
|
30 |
+
|
31 |
[build-system]
|
32 |
requires = ["poetry-core"]
|
33 |
build-backend = "poetry.core.masonry.api"
|