Spaces:
Sleeping
Sleeping
import openai | |
import whisper | |
import json | |
def transcribe_audio(audio_file): | |
""" | |
Transcribes the given audio file using the local Whisper model. | |
:param audio_file: Path to the audio file (e.g., WAV) to transcribe. | |
:return: The transcribed text as a string. | |
""" | |
# Load the 'base' Whisper model (you can also use 'small', 'medium', 'large', etc.) | |
model = whisper.load_model("base") | |
# Perform transcription | |
result = model.transcribe(audio_file) | |
# Return only the text portion | |
return result["text"] | |
def generate_edit_instructions(transcript_text): | |
""" | |
Sends the transcript to GPT-4 for video editing instructions in JSON format. | |
:param transcript_text: The raw transcript text from Whisper. | |
:return: A string containing GPT-4's response, typically expected to be valid JSON or structured text. | |
""" | |
system_msg = ( | |
"You are a video editing assistant. Your task is to parse the following transcript and " | |
"provide a list of editing instructions in JSON format. " | |
"Instructions may include timecodes for removing filler words, suggestions for B-roll, " | |
"and recommended transitions." | |
) | |
user_msg = f"Transcript:\n{transcript_text}\n\nOutput instructions in JSON..." | |
# Call the OpenAI API for GPT-based analysis | |
response = openai.ChatCompletion.create( | |
model="gpt-4", | |
messages=[ | |
{"role": "system", "content": system_msg}, | |
{"role": "user", "content": user_msg} | |
], | |
temperature=0.7, | |
max_tokens=500 | |
) | |
# Extract the AI's output from the first choice | |
return response.choices[0].message["content"] | |