smart-edit-assistant / pipelines /ai_inference.py
mgbam's picture
Update pipelines/ai_inference.py
cc3065b verified
raw
history blame
1.69 kB
import openai
import whisper
import json
def transcribe_audio(audio_file):
"""
Transcribes the given audio file using the local Whisper model.
:param audio_file: Path to the audio file (e.g., WAV) to transcribe.
:return: The transcribed text as a string.
"""
# Load the 'base' Whisper model (you can also use 'small', 'medium', 'large', etc.)
model = whisper.load_model("base")
# Perform transcription
result = model.transcribe(audio_file)
# Return only the text portion
return result["text"]
def generate_edit_instructions(transcript_text):
"""
Sends the transcript to GPT-4 for video editing instructions in JSON format.
:param transcript_text: The raw transcript text from Whisper.
:return: A string containing GPT-4's response, typically expected to be valid JSON or structured text.
"""
system_msg = (
"You are a video editing assistant. Your task is to parse the following transcript and "
"provide a list of editing instructions in JSON format. "
"Instructions may include timecodes for removing filler words, suggestions for B-roll, "
"and recommended transitions."
)
user_msg = f"Transcript:\n{transcript_text}\n\nOutput instructions in JSON..."
# Call the OpenAI API for GPT-based analysis
response = openai.ChatCompletion.create(
model="gpt-4",
messages=[
{"role": "system", "content": system_msg},
{"role": "user", "content": user_msg}
],
temperature=0.7,
max_tokens=500
)
# Extract the AI's output from the first choice
return response.choices[0].message["content"]