kokoro-podcast-generator

Running

App Files Files Community

ngxson HF staff commited on Feb 18

Commit

64db5cc

1 Parent(s): da4e284

add checkbox for noise, follow scroll input

Browse files

Files changed (5) hide show

front/src/components/AudioPlayer.tsx +1 -1
front/src/components/PodcastGenerator.tsx +41 -62
front/src/components/ScriptMaker.tsx +13 -1
front/src/utils/pipeline.ts +86 -0
index.html +0 -0

front/src/components/AudioPlayer.tsx CHANGED Viewed

@@ -36,7 +36,7 @@ export const AudioPlayer: React.FC<AudioPlayerProps> = ({ audioBuffer }) => {
         href={downloadUrl}
         download={'podcast.wav'}
       >
-        Download
       </a>
     </div>
   );

         href={downloadUrl}
         download={'podcast.wav'}
       >
+        Download WAV
       </a>
     </div>
   );

front/src/components/PodcastGenerator.tsx CHANGED Viewed

@@ -1,27 +1,16 @@
-import { useEffect, useState } from 'react';
 import { AudioPlayer } from './AudioPlayer';
-import { Podcast, PodcastTurn } from '../utils/types';
 import { parse } from 'yaml';
 import {
-  addNoise,
-  addSilence,
   audioBufferToMp3,
-  generateAudio,
   isBlogMode,
-  joinAudio,
-  loadWavAndDecode,
   pickRand,
   uploadFileToHub,
 } from '../utils/utils';
-// taken from https://freesound.org/people/artxmp1/sounds/660540
-import openingSoundSrc from '../opening-sound.wav';
 import { getBlogComment } from '../utils/prompts';
-interface GenerationStep {
-  turn: PodcastTurn;
-  audioBuffer?: AudioBuffer;
-}
 const SPEEDS = [
   { name: 'slow AF', value: 0.8 },
@@ -107,7 +96,10 @@ export const PodcastGenerator = ({
   const [speaker1, setSpeaker1] = useState<string>('');
   const [speaker2, setSpeaker2] = useState<string>('');
   const [speed, setSpeed] = useState<string>('1.2');
-  const [addIntroMusic, setAddIntroMusic] = useState<boolean>(false);
   const [blogFilePushToken, setBlogFilePushToken] = useState<string>(
     localStorage.getItem('blogFilePushToken') || ''
@@ -126,6 +118,12 @@ export const PodcastGenerator = ({
   useEffect(() => {
     setScript(genratedScript);
   }, [genratedScript]);
   const generatePodcast = async () => {
@@ -140,51 +138,20 @@ export const PodcastGenerator = ({
     let outputWav: AudioBuffer;
     try {
       const podcast = parseYAML(script);
-      const { speakerNames, turns } = podcast;
-      for (const turn of turns) {
-        // normalize it
-        turn.nextGapMilisecs =
-          Math.max(-600, Math.min(300, turn.nextGapMilisecs)) - 100;
-        turn.text = turn.text
-          .trim()
-          .replace(/’/g, "'")
-          .replace(/“/g, '"')
-          .replace(/”/g, '"');
-      }
-      const steps: GenerationStep[] = turns.map((turn) => ({ turn }));
-      setNumSteps(steps.length);
-      setNumStepsDone(0);
-      for (let i = 0; i < steps.length; i++) {
-        const step = steps[i];
-        const speakerIdx = speakerNames.indexOf(
-          step.turn.speakerName as string
-        ) as 1 | 0;
-        const speakerVoice = speakerIdx === 0 ? speaker1 : speaker2;
-        const url = await generateAudio(
-          step.turn.text,
-          speakerVoice,
-          parseFloat(speed)
-        );
-        step.audioBuffer = await loadWavAndDecode(url);
-        if (i === 0) {
-          outputWav = step.audioBuffer;
-          if (addIntroMusic) {
-            const openingSound = await loadWavAndDecode(openingSoundSrc);
-            outputWav = joinAudio(openingSound, outputWav!, -2000);
-          } else {
-            outputWav = addSilence(outputWav!, true, 200);
-          }
-        } else {
-          const lastStep = steps[i - 1];
-          outputWav = joinAudio(
-            outputWav!,
-            step.audioBuffer,
-            lastStep.turn.nextGapMilisecs
-          );
         }
-        setNumStepsDone(i + 1);
-      }
-      outputWav = addNoise(outputWav!, 0.002);
       setWav(outputWav! ?? null);
     } catch (e) {
       console.error(e);
@@ -232,6 +199,7 @@ export const PodcastGenerator = ({
           )}
           <textarea
             className="textarea textarea-bordered w-full h-72 p-2"
             placeholder="Type your script here..."
             value={script}
@@ -295,11 +263,22 @@ export const PodcastGenerator = ({
               <input
                 type="checkbox"
                 className="checkbox"
-                checked={addIntroMusic}
-                onChange={(e) => setAddIntroMusic(e.target.checked)}
                 disabled={isGenerating || busy}
               />
-              Add intro music
             </div>
           </div>

+import { useEffect, useRef, useState } from 'react';
 import { AudioPlayer } from './AudioPlayer';
+import { Podcast } from '../utils/types';
 import { parse } from 'yaml';
 import {
   audioBufferToMp3,
   isBlogMode,
   pickRand,
   uploadFileToHub,
 } from '../utils/utils';
 import { getBlogComment } from '../utils/prompts';
+import { pipelineGeneratePodcast } from '../utils/pipeline';
 const SPEEDS = [
   { name: 'slow AF', value: 0.8 },
   const [speaker1, setSpeaker1] = useState<string>('');
   const [speaker2, setSpeaker2] = useState<string>('');
   const [speed, setSpeed] = useState<string>('1.2');
+  const [isAddIntroMusic, setIsAddIntroMusic] = useState<boolean>(false);
+  const [isAddNoise, setIsAddNoise] = useState<boolean>(true);
+  const refInput = useRef<HTMLTextAreaElement | null>(null);
   const [blogFilePushToken, setBlogFilePushToken] = useState<string>(
     localStorage.getItem('blogFilePushToken') || ''
   useEffect(() => {
     setScript(genratedScript);
+    setTimeout(() => {
+      // auto scroll
+      if (refInput.current) {
+        refInput.current.scrollTop = refInput.current.scrollHeight;
+      }
+    }, 10);
   }, [genratedScript]);
   const generatePodcast = async () => {
     let outputWav: AudioBuffer;
     try {
       const podcast = parseYAML(script);
+      outputWav = await pipelineGeneratePodcast(
+        {
+          podcast,
+          speaker1,
+          speaker2,
+          speed: parseFloat(speed),
+          isAddIntroMusic,
+          isAddNoise,
+        },
+        (done: number, total: number) => {
+          setNumStepsDone(done);
+          setNumSteps(total);
         }
+      );
       setWav(outputWav! ?? null);
     } catch (e) {
       console.error(e);
           )}
           <textarea
+            ref={refInput}
             className="textarea textarea-bordered w-full h-72 p-2"
             placeholder="Type your script here..."
             value={script}
               <input
                 type="checkbox"
                 className="checkbox"
+                checked={isAddIntroMusic}
+                onChange={(e) => setIsAddIntroMusic(e.target.checked)}
+                disabled={isGenerating || busy}
+              />
+              Add intro music (to make it feels like radio)
+            </div>
+            <div className="flex items-center gap-2">
+              <input
+                type="checkbox"
+                className="checkbox"
+                checked={isAddNoise}
+                onChange={(e) => setIsAddNoise(e.target.checked)}
                 disabled={isGenerating || busy}
               />
+              Add small background noise (to make it more realistic)
             </div>
           </div>

front/src/components/ScriptMaker.tsx CHANGED Viewed

@@ -1,4 +1,4 @@
-import { useEffect, useState } from 'react';
 import { CONFIG } from '../config';
 import {
   getBlogPrompt,
@@ -55,10 +55,21 @@ export const ScriptMaker = ({
   const [thought, setThought] = useState<string>('');
   const [isGenerating, setIsGenerating] = useState<boolean>(false);
   useEffect(() => {
     setBusy(isGenerating);
   }, [isGenerating]);
   const generate = async () => {
     setIsGenerating(true);
     setThought('');
@@ -213,6 +224,7 @@ export const ScriptMaker = ({
             <textarea
               className="textarea textarea-bordered w-full h-24 p-2"
               value={thought}
               readOnly
             ></textarea>
           </>

+import { useEffect, useRef, useState } from 'react';
 import { CONFIG } from '../config';
 import {
   getBlogPrompt,
   const [thought, setThought] = useState<string>('');
   const [isGenerating, setIsGenerating] = useState<boolean>(false);
+  const refThought = useRef<HTMLTextAreaElement | null>(null);
   useEffect(() => {
     setBusy(isGenerating);
   }, [isGenerating]);
+  useEffect(() => {
+    setTimeout(() => {
+      // auto scroll
+      if (refThought.current) {
+        refThought.current.scrollTop = refThought.current.scrollHeight;
+      }
+    }, 10);
+  }, [thought]);
   const generate = async () => {
     setIsGenerating(true);
     setThought('');
             <textarea
               className="textarea textarea-bordered w-full h-24 p-2"
               value={thought}
+              ref={refThought}
               readOnly
             ></textarea>
           </>

front/src/utils/pipeline.ts ADDED Viewed

	@@ -0,0 +1,86 @@

+import { Podcast, PodcastTurn } from './types';
+import {
+  addNoise,
+  addSilence,
+  generateAudio,
+  joinAudio,
+  loadWavAndDecode,
+} from './utils';
+// taken from https://freesound.org/people/artxmp1/sounds/660540
+import openingSoundSrc from '../opening-sound.wav';
+export interface GenerationStep {
+  turn: PodcastTurn;
+  audioBuffer?: AudioBuffer;
+}
+export interface PodcastGenerationOptions {
+  podcast: Podcast;
+  speaker1: string;
+  speaker2: string;
+  speed: number;
+  isAddIntroMusic: boolean;
+  isAddNoise: boolean;
+}
+export const pipelineGeneratePodcast = async (
+  {
+    podcast,
+    speaker1,
+    speaker2,
+    speed,
+    isAddIntroMusic,
+    isAddNoise,
+  }: PodcastGenerationOptions,
+  onUpdate: (done: number, total: number) => void
+) => {
+  let outputWav: AudioBuffer;
+  const { speakerNames, turns } = podcast;
+  for (const turn of turns) {
+    // normalize it
+    turn.nextGapMilisecs =
+      Math.max(-600, Math.min(300, turn.nextGapMilisecs)) - 100;
+    turn.text = turn.text
+      .trim()
+      .replace(/’/g, "'")
+      .replace(/“/g, '"')
+      .replace(/”/g, '"');
+  }
+  const steps: GenerationStep[] = turns.map((turn) => ({ turn }));
+  onUpdate(0, steps.length);
+  for (let i = 0; i < steps.length; i++) {
+    const step = steps[i];
+    const speakerIdx = speakerNames.indexOf(step.turn.speakerName as string) as
+      | 1
+      | 0;
+    const speakerVoice = speakerIdx === 0 ? speaker1 : speaker2;
+    const url = await generateAudio(step.turn.text, speakerVoice, speed);
+    step.audioBuffer = await loadWavAndDecode(url);
+    if (i === 0) {
+      outputWav = step.audioBuffer;
+      if (isAddIntroMusic) {
+        const openingSound = await loadWavAndDecode(openingSoundSrc);
+        outputWav = joinAudio(openingSound, outputWav!, -2000);
+      } else {
+        outputWav = addSilence(outputWav!, true, 200);
+      }
+    } else {
+      const lastStep = steps[i - 1];
+      outputWav = joinAudio(
+        outputWav!,
+        step.audioBuffer,
+        lastStep.turn.nextGapMilisecs
+      );
+    }
+    onUpdate(i + 1, steps.length);
+  }
+  if (isAddNoise) {
+    outputWav = addNoise(outputWav!, 0.002);
+  }
+  // @ts-expect-error this is fine
+  if (!outputWav) {
+    throw new Error('outputWav is undefined');
+  }
+  return outputWav;
+};

index.html CHANGED Viewed

The diff for this file is too large to render. See raw diff