Spaces:

mgbam
/

smart-edit-assistant

Sleeping

smart-edit-assistant / pipelines /ai_inference.py

Update pipelines/ai_inference.py

cc3065b verified 3 months ago

1.69 kB

	import openai
	import whisper
	import json

	def transcribe_audio(audio_file):
	"""
	Transcribes the given audio file using the local Whisper model.

	:param audio_file: Path to the audio file (e.g., WAV) to transcribe.
	:return: The transcribed text as a string.
	"""
	# Load the 'base' Whisper model (you can also use 'small', 'medium', 'large', etc.)
	model = whisper.load_model("base")
	# Perform transcription
	result = model.transcribe(audio_file)
	# Return only the text portion
	return result["text"]

	def generate_edit_instructions(transcript_text):
	"""
	Sends the transcript to GPT-4 for video editing instructions in JSON format.

	:param transcript_text: The raw transcript text from Whisper.
	:return: A string containing GPT-4's response, typically expected to be valid JSON or structured text.
	"""
	system_msg = (
	"You are a video editing assistant. Your task is to parse the following transcript and "
	"provide a list of editing instructions in JSON format. "
	"Instructions may include timecodes for removing filler words, suggestions for B-roll, "
	"and recommended transitions."
	)

	user_msg = f"Transcript:\n{transcript_text}\n\nOutput instructions in JSON..."

	# Call the OpenAI API for GPT-based analysis
	response = openai.ChatCompletion.create(
	model="gpt-4",
	messages=[
	{"role": "system", "content": system_msg},
	{"role": "user", "content": user_msg}
	],
	temperature=0.7,
	max_tokens=500
	)

	# Extract the AI's output from the first choice
	return response.choices[0].message["content"]