import openai import whisper import json def transcribe_audio(audio_file): """ Transcribes the given audio file using the local Whisper model. :param audio_file: Path to the audio file (e.g., WAV) to transcribe. :return: The transcribed text as a string. """ # Load the 'base' Whisper model (you can also use 'small', 'medium', 'large', etc.) model = whisper.load_model("base") # Perform transcription result = model.transcribe(audio_file) # Return only the text portion return result["text"] def generate_edit_instructions(transcript_text): """ Sends the transcript to GPT-4 for video editing instructions in JSON format. :param transcript_text: The raw transcript text from Whisper. :return: A string containing GPT-4's response, typically expected to be valid JSON or structured text. """ system_msg = ( "You are a video editing assistant. Your task is to parse the following transcript and " "provide a list of editing instructions in JSON format. " "Instructions may include timecodes for removing filler words, suggestions for B-roll, " "and recommended transitions." ) user_msg = f"Transcript:\n{transcript_text}\n\nOutput instructions in JSON..." # Call the OpenAI API for GPT-based analysis response = openai.ChatCompletion.create( model="gpt-4", messages=[ {"role": "system", "content": system_msg}, {"role": "user", "content": user_msg} ], temperature=0.7, max_tokens=500 ) # Extract the AI's output from the first choice return response.choices[0].message["content"]