Spaces:

jbilcke-hf
/

VideoChain-API

Running on CPU Upgrade

App Files Files Community

VideoChain-API / src /llm /enrichVideoSpecsUsingLLM.mts

jbilcke-hf HF staff

live test

ccd48b8 over 1 year ago

raw

history blame

3 kB

	import { ChatCompletionRequestMessage } from "openai"

	import { Video, VideoAPIRequest } from "../types.mts"
	import { generateYAML } from "./openai/generateYAML.mts"
	import { HallucinatedVideoRequest, OpenAIErrorResponse } from "./types.mts"
	import { getQueryChatMessages } from "../preproduction/prompts.mts"
	import { getValidNumber } from "../utils/getValidNumber.mts"
	import { parseShotRequest } from "../utils/parseShotRequest.mts"


	export const enrichVideoSpecsUsingLLM = async (video: Video): Promise<Video> => {

	const messages: ChatCompletionRequestMessage[] = getQueryChatMessages(video.videoPrompt)

	const defaultValue = {} as unknown as HallucinatedVideoRequest

	let hallucinatedVideo: HallucinatedVideoRequest
	video.shots = []

	try {
	hallucinatedVideo = await generateYAML<HallucinatedVideoRequest>(
	messages,
	defaultValue
	)
	console.log("enrichVideoSpecsUsingLLM: hallucinatedVideo = ", hallucinatedVideo)
	} catch (err) {

	let error: OpenAIErrorResponse = err?.response?.data?.error as unknown as OpenAIErrorResponse
	if (!error) {
	error = { message: `${err \|\| ""}` } as unknown as OpenAIErrorResponse
	}

	console.error(JSON.stringify(error, null, 2))
	throw new Error(`failed to call the LLM: ${error.message}`)
	}

	// const video = JSON.parse(JSON.stringify(referenceVideo)) as Video

	// TODO here we should make some verifications and perhaps even some conversions
	// betwen the LLM response and the actual format used in a videoRequest
	video.backgroundAudioPrompt = hallucinatedVideo.backgroundAudioPrompt \|\| video.backgroundAudioPrompt
	video.foregroundAudioPrompt = hallucinatedVideo.foregroundAudioPrompt \|\| video.foregroundAudioPrompt
	video.actorPrompt = hallucinatedVideo.actorPrompt \|\| video.actorPrompt
	video.actorVoicePrompt = hallucinatedVideo.actorVoicePrompt \|\| video.actorVoicePrompt

	video.noise = typeof hallucinatedVideo.noise !== "undefined"
	? (`${hallucinatedVideo.noise \|\| ""}`.toLowerCase() === "true")
	: video.noise

	video.noiseAmount = typeof hallucinatedVideo.noiseAmount !== "undefined"
	? getValidNumber(hallucinatedVideo.noiseAmount, 0, 10, 2)
	: video.noiseAmount

	video.outroDurationMs = typeof hallucinatedVideo.outroDurationMs !== "undefined"
	? getValidNumber(hallucinatedVideo.outroDurationMs, 0, 3000, 500)
	: video.outroDurationMs

	const hallucinatedShots = Array.isArray(hallucinatedVideo.shots) ? hallucinatedVideo.shots : []


	for (const hallucinatedShot of hallucinatedShots) {
	const shot = await parseShotRequest(video, {
	shotPrompt: hallucinatedShot.shotPrompt,
	environmentPrompt: hallucinatedShot.environmentPrompt,
	photographyPrompt: hallucinatedShot.photographyPrompt,
	actionPrompt: hallucinatedShot.actionPrompt,
	foregroundAudioPrompt: hallucinatedShot.foregroundAudioPrompt
	})
	video.shots.push(shot)
	}

	console.log("enrichVideoSpecsUsingLLM: video = ", video)

	return video
	}