from src.segmenter import detect_event_segments from src.transcriber import transcribe_video from src.event_card import parse_game_card from src.labeler import TogetherLLMLabeler from src.embedder import InternVLEmbedder from src.pinecone_store import PineconeStore from src.utils import ( extract_key_frames, save_frames_locally, generate_frame_urls, match_transcript_to_events, clip_video_segment ) labeler = TogetherLLMLabeler() embedder = InternVLEmbedder() pinecone = PineconeStore() def run_pipeline(video_path, game_card_str): game_card = parse_game_card(game_card_str) transcript = transcribe_video(video_path) events = detect_event_segments(video_path) matched_events = match_transcript_to_events(events, transcript) results = [] for idx, event in enumerate(matched_events): event_id = f"event-{idx}" frames = extract_key_frames(video_path, event['start_sec'], event['end_sec']) frame_paths = save_frames_locally(frames, event_id) frame_urls = generate_frame_urls(frame_paths) label = labeler.generate_label( game_card=game_card, transcript=event['transcript'], spatial_context=event['frames'], frame_urls=frame_urls ) clip_path = clip_video_segment(video_path, event['start_sec'], event['end_sec'], event_id) video_vector = embedder.embed_video(clip_path) text_vector = embedder.embed_text(label) metadata = { "start_sec": event['start_sec'], "end_sec": event['end_sec'], "label": label } pinecone.upsert(f"{event_id}-video", video_vector, metadata) pinecone.upsert(f"{event_id}-text", text_vector, metadata) results.append(metadata) return {"events": results} def search_highlights(query, top_k=5): query_vector = embedder.embed_text(query) results = pinecone.query(query_vector, filter_key="text", top_k=top_k) return [ f"{r['label']} ({r['start_sec']}s - {r['end_sec']}s)" for r in results ]