blip / src /pipeline.py
amezi's picture
Adding skeleton highlights creator
58ac08a
raw
history blame
2.08 kB
from src.segmenter import detect_event_segments
from src.transcriber import transcribe_video
from src.event_card import parse_game_card
from src.labeler import TogetherLLMLabeler
from src.embedder import InternVLEmbedder
from src.pinecone_store import PineconeStore
from src.utils import (
extract_key_frames, save_frames_locally,
generate_frame_urls, match_transcript_to_events,
clip_video_segment
)
labeler = TogetherLLMLabeler()
embedder = InternVLEmbedder()
pinecone = PineconeStore()
def run_pipeline(video_path, game_card_str):
game_card = parse_game_card(game_card_str)
transcript = transcribe_video(video_path)
events = detect_event_segments(video_path)
matched_events = match_transcript_to_events(events, transcript)
results = []
for idx, event in enumerate(matched_events):
event_id = f"event-{idx}"
frames = extract_key_frames(video_path, event['start_sec'], event['end_sec'])
frame_paths = save_frames_locally(frames, event_id)
frame_urls = generate_frame_urls(frame_paths)
label = labeler.generate_label(
game_card=game_card,
transcript=event['transcript'],
spatial_context=event['frames'],
frame_urls=frame_urls
)
clip_path = clip_video_segment(video_path, event['start_sec'], event['end_sec'], event_id)
video_vector = embedder.embed_video(clip_path)
text_vector = embedder.embed_text(label)
metadata = {
"start_sec": event['start_sec'],
"end_sec": event['end_sec'],
"label": label
}
pinecone.upsert(f"{event_id}-video", video_vector, metadata)
pinecone.upsert(f"{event_id}-text", text_vector, metadata)
results.append(metadata)
return {"events": results}
def search_highlights(query, top_k=5):
query_vector = embedder.embed_text(query)
results = pinecone.query(query_vector, filter_key="text", top_k=top_k)
return [
f"{r['label']} ({r['start_sec']}s - {r['end_sec']}s)" for r in results
]