jbilcke-hf HF staff commited on
Commit
67f97d0
·
1 Parent(s): 81d1fba

fix the mp3 generator

Browse files
src/app/api/generators/speech/generateVoiceWithParlerTTS.ts CHANGED
@@ -5,6 +5,12 @@ const gradioSpaceApiUrl = `https://jbilcke-hf-ai-tube-model-parler-tts-mini.hf.s
5
  const huggingFaceSpace = "jbilcke-hf/ai-tube-model-parler-tts-mini"
6
  const apiKey = `${process.env.MICROSERVICE_API_SECRET_TOKEN || ""}`
7
 
 
 
 
 
 
 
8
  export async function generateSpeechWithParlerTTS({
9
  text,
10
  audioId,
@@ -17,7 +23,6 @@ export async function generateSpeechWithParlerTTS({
17
  neverThrow?: boolean
18
  }): Promise<string> {
19
 
20
-
21
  const actualFunction = async () => {
22
 
23
  const res = await fetch(gradioSpaceApiUrl + (gradioSpaceApiUrl.endsWith("/") ? "" : "/") + "api/predict", {
@@ -58,7 +63,7 @@ export async function generateSpeechWithParlerTTS({
58
  throw new Error(`the returned audio was empty`)
59
  }
60
 
61
- return addBase64Header(data[0] as string, "wav")
62
  }
63
 
64
  try {
 
5
  const huggingFaceSpace = "jbilcke-hf/ai-tube-model-parler-tts-mini"
6
  const apiKey = `${process.env.MICROSERVICE_API_SECRET_TOKEN || ""}`
7
 
8
+ /**
9
+ * Note: this generates a MP3 file
10
+ *
11
+ * @param param0
12
+ * @returns
13
+ */
14
  export async function generateSpeechWithParlerTTS({
15
  text,
16
  audioId,
 
23
  neverThrow?: boolean
24
  }): Promise<string> {
25
 
 
26
  const actualFunction = async () => {
27
 
28
  const res = await fetch(gradioSpaceApiUrl + (gradioSpaceApiUrl.endsWith("/") ? "" : "/") + "api/predict", {
 
63
  throw new Error(`the returned audio was empty`)
64
  }
65
 
66
+ return addBase64Header(data[0] as string, "mp3")
67
  }
68
 
69
  try {
src/app/api/generators/speech/generateVoiceWithXTTS2.txt CHANGED
@@ -1,35 +1,37 @@
 
 
1
 
2
- import { StoryLine } from "../../types/structures.mts"
3
- import { tryApiCalls } from "../../utils/tryApiCalls.mts"
4
- import { promptToGenerateAudioStory } from "../prompts/prompts.mts"
5
- import { microserviceApiKey } from "../../config.mts"
6
- import { addBase64Header } from "../../base64/addBase64.mts"
7
 
8
  // TODO delete this? we don't need an env var for this I think?
9
- const aiStoryServerApiUrl = `https://jbilcke-hf-ai-story-server.hf.space`
10
  const huggingFaceSpace = "jbilcke-hf/ai-story-server"
 
11
 
12
- export async function generateAudioStory({
13
- prompt,
14
- voice,
15
- // maxLines,
16
- neverThrow,
17
- debug,
18
  }: {
19
- prompt: string
20
- voice?: string
21
- // maxLines: number
22
- neverThrow?: boolean
23
  debug?: boolean
24
- }): Promise<StoryLine[]> {
 
25
  const actualFunction = async () => {
26
 
27
- const cropped = prompt.slice(0, 30)
28
  // console.log(`user requested "${cropped}${cropped !== prompt ? "..." : ""}"`)
29
 
30
  // positivePrompt = filterOutBadWords(positivePrompt)
31
 
32
- const res = await fetch(aiStoryServerApiUrl + (aiStoryServerApiUrl.endsWith("/") ? "" : "/") + "api/predict", {
 
 
33
  method: "POST",
34
  headers: {
35
  "Content-Type": "application/json",
@@ -38,12 +40,13 @@ export async function generateAudioStory({
38
  body: JSON.stringify({
39
  fn_index: 0, // <- important!
40
  data: [
41
- microserviceApiKey,
42
  promptToGenerateAudioStory,
43
  prompt,
44
 
45
  // TODO: add support for custom wav
46
- voice === "Julian" ? "Julian" : "Cloée",
 
47
 
48
  // maxLines,
49
  ],
@@ -65,7 +68,7 @@ export async function generateAudioStory({
65
 
66
  return stories.map(line => ({
67
  text: line.text.replaceAll(" .", ".").replaceAll(" ?", "?").replaceAll(" !", "!").trim(),
68
- audio: addBase64Header(line.audio, "mp4")
69
  }))
70
  }
71
 
@@ -80,11 +83,11 @@ export async function generateAudioStory({
80
  debug,
81
  failureMessage: "failed to generate the audio story"
82
  })
83
- return result
84
  } catch (err) {
85
  if (neverThrow) {
86
  console.error(`generateAudioStory():`, err)
87
- return []
88
  } else {
89
  throw err
90
  }
 
1
+ import { addBase64Header } from "@/lib/data/addBase64Header"
2
+ import { tryApiCalls } from "../../utils/tryApiCall"
3
 
4
+ export type StoryLine = {
5
+ text: string
6
+ audio: string // in base64
7
+ }
 
8
 
9
  // TODO delete this? we don't need an env var for this I think?
10
+ const gradioSpaceApiUrl = `https://jbilcke-hf-ai-story-server.hf.space`
11
  const huggingFaceSpace = "jbilcke-hf/ai-story-server"
12
+ const apiKey = `${process.env.MICROSERVICE_API_SECRET_TOKEN || ""}`
13
 
14
+ export async function generateSpeechWithParlerTTS({
15
+ text,
16
+ audioId,
17
+ debug = false,
18
+ neverThrow = false,
 
19
  }: {
20
+ text: string
21
+ audioId: string
 
 
22
  debug?: boolean
23
+ neverThrow?: boolean
24
+ }): Promise<string> {
25
  const actualFunction = async () => {
26
 
27
+ const prompt = text.slice(0, 30)
28
  // console.log(`user requested "${cropped}${cropped !== prompt ? "..." : ""}"`)
29
 
30
  // positivePrompt = filterOutBadWords(positivePrompt)
31
 
32
+ const promptToGenerateAudioStory = ``
33
+
34
+ const res = await fetch(gradioSpaceApiUrl + (gradioSpaceApiUrl.endsWith("/") ? "" : "/") + "api/predict", {
35
  method: "POST",
36
  headers: {
37
  "Content-Type": "application/json",
 
40
  body: JSON.stringify({
41
  fn_index: 0, // <- important!
42
  data: [
43
+ apiKey,
44
  promptToGenerateAudioStory,
45
  prompt,
46
 
47
  // TODO: add support for custom wav
48
+ // voice === "Julian" ? "Julian" : "Cloée",
49
+ "Julian",
50
 
51
  // maxLines,
52
  ],
 
68
 
69
  return stories.map(line => ({
70
  text: line.text.replaceAll(" .", ".").replaceAll(" ?", "?").replaceAll(" !", "!").trim(),
71
+ audio: addBase64Header(line.audio, "mp3")
72
  }))
73
  }
74
 
 
83
  debug,
84
  failureMessage: "failed to generate the audio story"
85
  })
86
+ return BROKEN
87
  } catch (err) {
88
  if (neverThrow) {
89
  console.error(`generateAudioStory():`, err)
90
+ return ""
91
  } else {
92
  throw err
93
  }
src/app/api/v1/edit/dialogues/route.ts CHANGED
@@ -46,6 +46,7 @@ export async function POST(req: NextRequest) {
46
  // console.log(`[api/generate/dialogues] generating audio..`)
47
 
48
  try {
 
49
  shotDialogueSegment.assetUrl = await generateSpeechWithParlerTTS({
50
  text: shotDialogueSegment.prompt,
51
  audioId: getSpeechBackgroundAudioPrompt(shotSegments, clap.entityIndex, ["high quality", "crisp", "detailed"]),
 
46
  // console.log(`[api/generate/dialogues] generating audio..`)
47
 
48
  try {
49
+ // this generates a mp3
50
  shotDialogueSegment.assetUrl = await generateSpeechWithParlerTTS({
51
  text: shotDialogueSegment.prompt,
52
  audioId: getSpeechBackgroundAudioPrompt(shotSegments, clap.entityIndex, ["high quality", "crisp", "detailed"]),