Spaces:

mgbam
/

smart-edit-assistant

Sleeping

File size: 1,686 Bytes

ad94a24
 
 
 
 
cc3065b
 
 
 
 
 
 
ad94a24
cc3065b
ad94a24
cc3065b
ad94a24
 
 
cc3065b
 
 
 
 
 
 
 
 
 
 
 
 
ad94a24
 
cc3065b
ad94a24
 
 
 
 
 
cc3065b
 
ad94a24
cc3065b
 
ad94a24

import openai
import whisper
import json

def transcribe_audio(audio_file):
    """
    Transcribes the given audio file using the local Whisper model.
    
    :param audio_file: Path to the audio file (e.g., WAV) to transcribe.
    :return: The transcribed text as a string.
    """
    # Load the 'base' Whisper model (you can also use 'small', 'medium', 'large', etc.)
    model = whisper.load_model("base")
    # Perform transcription
    result = model.transcribe(audio_file)
    # Return only the text portion
    return result["text"]

def generate_edit_instructions(transcript_text):
    """
    Sends the transcript to GPT-4 for video editing instructions in JSON format.
    
    :param transcript_text: The raw transcript text from Whisper.
    :return: A string containing GPT-4's response, typically expected to be valid JSON or structured text.
    """
    system_msg = (
        "You are a video editing assistant. Your task is to parse the following transcript and "
        "provide a list of editing instructions in JSON format. "
        "Instructions may include timecodes for removing filler words, suggestions for B-roll, "
        "and recommended transitions."
    )

    user_msg = f"Transcript:\n{transcript_text}\n\nOutput instructions in JSON..."

    # Call the OpenAI API for GPT-based analysis
    response = openai.ChatCompletion.create(
        model="gpt-4",
        messages=[
            {"role": "system", "content": system_msg},
            {"role": "user", "content": user_msg}
        ],
        temperature=0.7,
        max_tokens=500
    )

    # Extract the AI's output from the first choice
    return response.choices[0].message["content"]