Spaces:
Sleeping
Sleeping
File size: 1,686 Bytes
ad94a24 cc3065b ad94a24 cc3065b ad94a24 cc3065b ad94a24 cc3065b ad94a24 cc3065b ad94a24 cc3065b ad94a24 cc3065b ad94a24 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 |
import openai
import whisper
import json
def transcribe_audio(audio_file):
"""
Transcribes the given audio file using the local Whisper model.
:param audio_file: Path to the audio file (e.g., WAV) to transcribe.
:return: The transcribed text as a string.
"""
# Load the 'base' Whisper model (you can also use 'small', 'medium', 'large', etc.)
model = whisper.load_model("base")
# Perform transcription
result = model.transcribe(audio_file)
# Return only the text portion
return result["text"]
def generate_edit_instructions(transcript_text):
"""
Sends the transcript to GPT-4 for video editing instructions in JSON format.
:param transcript_text: The raw transcript text from Whisper.
:return: A string containing GPT-4's response, typically expected to be valid JSON or structured text.
"""
system_msg = (
"You are a video editing assistant. Your task is to parse the following transcript and "
"provide a list of editing instructions in JSON format. "
"Instructions may include timecodes for removing filler words, suggestions for B-roll, "
"and recommended transitions."
)
user_msg = f"Transcript:\n{transcript_text}\n\nOutput instructions in JSON..."
# Call the OpenAI API for GPT-based analysis
response = openai.ChatCompletion.create(
model="gpt-4",
messages=[
{"role": "system", "content": system_msg},
{"role": "user", "content": user_msg}
],
temperature=0.7,
max_tokens=500
)
# Extract the AI's output from the first choice
return response.choices[0].message["content"]
|