jbilcke-hf HF staff commited on
Commit
5513dc6
·
1 Parent(s): 90266e1

try to make the code more resilient to HF Inference API crashes

Browse files
Files changed (1) hide show
  1. src/app/api/v1/create/index.ts +34 -5
src/app/api/v1/create/index.ts CHANGED
@@ -4,6 +4,7 @@ import { ClapProject, getValidNumber, newClap, newSegment } from "@aitube/clap"
4
 
5
  import { predict } from "@/app/api/providers/huggingface/predictWithHuggingFace"
6
  import { parseRawStringToYAML } from "@/app/api/parsers/parseRawStringToYAML"
 
7
 
8
  import { systemPrompt } from "./systemPrompt"
9
  import { LatentStory } from "./types"
@@ -31,22 +32,50 @@ export async function create(request: {
31
 
32
  const userPrompt = `Video story to generate: ${prompt}`
33
 
 
 
 
34
  // TODO use streaming for the Hugging Face prediction
35
  //
36
  // note that a Clap file is actually a YAML stream of documents
37
  // so technically we could stream everything from end-to-end
38
  // (but I haven't coded the helpers to do this yet)
39
- const rawString = await predict({
40
  systemPrompt,
41
  userPrompt,
42
- nbMaxNewTokens: 1400,
43
- prefix: "```yaml\n",
44
  })
45
 
46
  console.log("api/v1/create(): rawString: ", rawString)
47
 
48
- const shots = parseRawStringToYAML<LatentStory[]>(rawString, [])
49
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
  console.log(`api/v1/create(): generated ${shots.length} shots`)
51
 
52
  // this is approximate - TTS generation will determine the final duration of each shot
 
4
 
5
  import { predict } from "@/app/api/providers/huggingface/predictWithHuggingFace"
6
  import { parseRawStringToYAML } from "@/app/api/parsers/parseRawStringToYAML"
7
+ import { sleep } from "@/lib/utils/sleep"
8
 
9
  import { systemPrompt } from "./systemPrompt"
10
  import { LatentStory } from "./types"
 
32
 
33
  const userPrompt = `Video story to generate: ${prompt}`
34
 
35
+ const prefix = "```yaml\n"
36
+ const nbMaxNewTokens = 1400
37
+
38
  // TODO use streaming for the Hugging Face prediction
39
  //
40
  // note that a Clap file is actually a YAML stream of documents
41
  // so technically we could stream everything from end-to-end
42
  // (but I haven't coded the helpers to do this yet)
43
+ let rawString = await predict({
44
  systemPrompt,
45
  userPrompt,
46
+ nbMaxNewTokens,
47
+ prefix,
48
  })
49
 
50
  console.log("api/v1/create(): rawString: ", rawString)
51
 
52
+ let shots: LatentStory[] = []
53
+
54
+ let maybeShots = parseRawStringToYAML<LatentStory[]>(rawString, [])
55
+
56
+ if (!Array.isArray(maybeShots) || maybeShots.length === 0) {
57
+ console.log(`api/v1/create(): failed to generate shots.. trying again`)
58
+
59
+ await sleep(2000)
60
+
61
+ rawString = await predict({
62
+ systemPrompt,
63
+ userPrompt: userPrompt + ".", // we trick the Hugging Face cache
64
+ nbMaxNewTokens,
65
+ prefix,
66
+ })
67
+
68
+ console.log("api/v1/create(): rawString: ", rawString)
69
+
70
+ maybeShots = parseRawStringToYAML<LatentStory[]>(rawString, [])
71
+ if (!Array.isArray(maybeShots) || maybeShots.length === 0) {
72
+ console.log(`api/v1/create(): failed to generate shots for the second time, which indicates an issue with the Hugging Face API`)
73
+ } else {
74
+ shots = maybeShots
75
+ }
76
+ } else {
77
+ shots = maybeShots
78
+ }
79
  console.log(`api/v1/create(): generated ${shots.length} shots`)
80
 
81
  // this is approximate - TTS generation will determine the final duration of each shot