# coding=utf-8 # judge voice-over from third_party.VideoLLaMA2.videollama2 import model_init, mm_infer import logging class Step02: def __init__(self, model_path, step2_mode): self.modal = "video" self.log = logging.getLogger(self.__class__.__name__) self.log.setLevel(logging.INFO) self.model, self.processor, self.tokenizer = model_init(model_path) self.preprocess = self.processor[self.modal] self.step2_mode = step2_mode def run_step0(self, video_path, modal_type='v'): question = f"Generate high-quality audio from video step-by-step." # if modal_type == "a": # self.model.model.vision_tower = None # elif modal_type == "v": # self.model.model.audio_tower = None # elif modal_type == "av": # pass # else: # raise NotImplementedError self.log.info("######################################################################################################") self.log.info("Generate high-quality audio from video step-by-step...") audio_video_tensor = self.preprocess(video_path, va=False) output = mm_infer( audio_video_tensor, question, model=self.model, tokenizer=self.tokenizer, modal=self.modal, do_sample=False, ) return output def run_step2(self, video_audio_path, modal_type='av'): question = f"Given a video and its corresponding audio, determine whether the audio contains voice-over? Options: A. Yes, B. No. Choose A or B." # if modal_type == "a": # self.model.model.vision_tower = None # elif modal_type == "v": # self.model.model.audio_tower = None # elif modal_type == "av": # pass # else: # raise NotImplementedError audio_video_tensor = self.preprocess(video_audio_path, va=True) output = mm_infer( audio_video_tensor, question, model=self.model, tokenizer=self.tokenizer, modal=self.modal, do_sample=False, ) if self.step2_mode == "cot": output = output.split("")[-1][1] print("1111111111111111111111111: ", output) output = (output == "A") if output: self.log.info(f"The video generated by Step1 ({video_audio_path}) contains voice-over.") else: self.log.info(f"The video generated by Step1 ({video_audio_path}) does not contain voice-over.") self.log.info("Finish Step2 successfully.\n") return output