ngxson HF staff commited on
Commit
64db5cc
·
1 Parent(s): da4e284

add checkbox for noise, follow scroll input

Browse files
front/src/components/AudioPlayer.tsx CHANGED
@@ -36,7 +36,7 @@ export const AudioPlayer: React.FC<AudioPlayerProps> = ({ audioBuffer }) => {
36
  href={downloadUrl}
37
  download={'podcast.wav'}
38
  >
39
- Download
40
  </a>
41
  </div>
42
  );
 
36
  href={downloadUrl}
37
  download={'podcast.wav'}
38
  >
39
+ Download WAV
40
  </a>
41
  </div>
42
  );
front/src/components/PodcastGenerator.tsx CHANGED
@@ -1,27 +1,16 @@
1
- import { useEffect, useState } from 'react';
2
  import { AudioPlayer } from './AudioPlayer';
3
- import { Podcast, PodcastTurn } from '../utils/types';
4
  import { parse } from 'yaml';
5
  import {
6
- addNoise,
7
- addSilence,
8
  audioBufferToMp3,
9
- generateAudio,
10
  isBlogMode,
11
- joinAudio,
12
- loadWavAndDecode,
13
  pickRand,
14
  uploadFileToHub,
15
  } from '../utils/utils';
16
 
17
- // taken from https://freesound.org/people/artxmp1/sounds/660540
18
- import openingSoundSrc from '../opening-sound.wav';
19
  import { getBlogComment } from '../utils/prompts';
20
-
21
- interface GenerationStep {
22
- turn: PodcastTurn;
23
- audioBuffer?: AudioBuffer;
24
- }
25
 
26
  const SPEEDS = [
27
  { name: 'slow AF', value: 0.8 },
@@ -107,7 +96,10 @@ export const PodcastGenerator = ({
107
  const [speaker1, setSpeaker1] = useState<string>('');
108
  const [speaker2, setSpeaker2] = useState<string>('');
109
  const [speed, setSpeed] = useState<string>('1.2');
110
- const [addIntroMusic, setAddIntroMusic] = useState<boolean>(false);
 
 
 
111
 
112
  const [blogFilePushToken, setBlogFilePushToken] = useState<string>(
113
  localStorage.getItem('blogFilePushToken') || ''
@@ -126,6 +118,12 @@ export const PodcastGenerator = ({
126
 
127
  useEffect(() => {
128
  setScript(genratedScript);
 
 
 
 
 
 
129
  }, [genratedScript]);
130
 
131
  const generatePodcast = async () => {
@@ -140,51 +138,20 @@ export const PodcastGenerator = ({
140
  let outputWav: AudioBuffer;
141
  try {
142
  const podcast = parseYAML(script);
143
- const { speakerNames, turns } = podcast;
144
- for (const turn of turns) {
145
- // normalize it
146
- turn.nextGapMilisecs =
147
- Math.max(-600, Math.min(300, turn.nextGapMilisecs)) - 100;
148
- turn.text = turn.text
149
- .trim()
150
- .replace(/’/g, "'")
151
- .replace(/“/g, '"')
152
- .replace(/”/g, '"');
153
- }
154
- const steps: GenerationStep[] = turns.map((turn) => ({ turn }));
155
- setNumSteps(steps.length);
156
- setNumStepsDone(0);
157
- for (let i = 0; i < steps.length; i++) {
158
- const step = steps[i];
159
- const speakerIdx = speakerNames.indexOf(
160
- step.turn.speakerName as string
161
- ) as 1 | 0;
162
- const speakerVoice = speakerIdx === 0 ? speaker1 : speaker2;
163
- const url = await generateAudio(
164
- step.turn.text,
165
- speakerVoice,
166
- parseFloat(speed)
167
- );
168
- step.audioBuffer = await loadWavAndDecode(url);
169
- if (i === 0) {
170
- outputWav = step.audioBuffer;
171
- if (addIntroMusic) {
172
- const openingSound = await loadWavAndDecode(openingSoundSrc);
173
- outputWav = joinAudio(openingSound, outputWav!, -2000);
174
- } else {
175
- outputWav = addSilence(outputWav!, true, 200);
176
- }
177
- } else {
178
- const lastStep = steps[i - 1];
179
- outputWav = joinAudio(
180
- outputWav!,
181
- step.audioBuffer,
182
- lastStep.turn.nextGapMilisecs
183
- );
184
  }
185
- setNumStepsDone(i + 1);
186
- }
187
- outputWav = addNoise(outputWav!, 0.002);
188
  setWav(outputWav! ?? null);
189
  } catch (e) {
190
  console.error(e);
@@ -232,6 +199,7 @@ export const PodcastGenerator = ({
232
  )}
233
 
234
  <textarea
 
235
  className="textarea textarea-bordered w-full h-72 p-2"
236
  placeholder="Type your script here..."
237
  value={script}
@@ -295,11 +263,22 @@ export const PodcastGenerator = ({
295
  <input
296
  type="checkbox"
297
  className="checkbox"
298
- checked={addIntroMusic}
299
- onChange={(e) => setAddIntroMusic(e.target.checked)}
 
 
 
 
 
 
 
 
 
 
 
300
  disabled={isGenerating || busy}
301
  />
302
- Add intro music
303
  </div>
304
  </div>
305
 
 
1
+ import { useEffect, useRef, useState } from 'react';
2
  import { AudioPlayer } from './AudioPlayer';
3
+ import { Podcast } from '../utils/types';
4
  import { parse } from 'yaml';
5
  import {
 
 
6
  audioBufferToMp3,
 
7
  isBlogMode,
 
 
8
  pickRand,
9
  uploadFileToHub,
10
  } from '../utils/utils';
11
 
 
 
12
  import { getBlogComment } from '../utils/prompts';
13
+ import { pipelineGeneratePodcast } from '../utils/pipeline';
 
 
 
 
14
 
15
  const SPEEDS = [
16
  { name: 'slow AF', value: 0.8 },
 
96
  const [speaker1, setSpeaker1] = useState<string>('');
97
  const [speaker2, setSpeaker2] = useState<string>('');
98
  const [speed, setSpeed] = useState<string>('1.2');
99
+ const [isAddIntroMusic, setIsAddIntroMusic] = useState<boolean>(false);
100
+ const [isAddNoise, setIsAddNoise] = useState<boolean>(true);
101
+
102
+ const refInput = useRef<HTMLTextAreaElement | null>(null);
103
 
104
  const [blogFilePushToken, setBlogFilePushToken] = useState<string>(
105
  localStorage.getItem('blogFilePushToken') || ''
 
118
 
119
  useEffect(() => {
120
  setScript(genratedScript);
121
+ setTimeout(() => {
122
+ // auto scroll
123
+ if (refInput.current) {
124
+ refInput.current.scrollTop = refInput.current.scrollHeight;
125
+ }
126
+ }, 10);
127
  }, [genratedScript]);
128
 
129
  const generatePodcast = async () => {
 
138
  let outputWav: AudioBuffer;
139
  try {
140
  const podcast = parseYAML(script);
141
+ outputWav = await pipelineGeneratePodcast(
142
+ {
143
+ podcast,
144
+ speaker1,
145
+ speaker2,
146
+ speed: parseFloat(speed),
147
+ isAddIntroMusic,
148
+ isAddNoise,
149
+ },
150
+ (done: number, total: number) => {
151
+ setNumStepsDone(done);
152
+ setNumSteps(total);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
153
  }
154
+ );
 
 
155
  setWav(outputWav! ?? null);
156
  } catch (e) {
157
  console.error(e);
 
199
  )}
200
 
201
  <textarea
202
+ ref={refInput}
203
  className="textarea textarea-bordered w-full h-72 p-2"
204
  placeholder="Type your script here..."
205
  value={script}
 
263
  <input
264
  type="checkbox"
265
  className="checkbox"
266
+ checked={isAddIntroMusic}
267
+ onChange={(e) => setIsAddIntroMusic(e.target.checked)}
268
+ disabled={isGenerating || busy}
269
+ />
270
+ Add intro music (to make it feels like radio)
271
+ </div>
272
+
273
+ <div className="flex items-center gap-2">
274
+ <input
275
+ type="checkbox"
276
+ className="checkbox"
277
+ checked={isAddNoise}
278
+ onChange={(e) => setIsAddNoise(e.target.checked)}
279
  disabled={isGenerating || busy}
280
  />
281
+ Add small background noise (to make it more realistic)
282
  </div>
283
  </div>
284
 
front/src/components/ScriptMaker.tsx CHANGED
@@ -1,4 +1,4 @@
1
- import { useEffect, useState } from 'react';
2
  import { CONFIG } from '../config';
3
  import {
4
  getBlogPrompt,
@@ -55,10 +55,21 @@ export const ScriptMaker = ({
55
  const [thought, setThought] = useState<string>('');
56
  const [isGenerating, setIsGenerating] = useState<boolean>(false);
57
 
 
 
58
  useEffect(() => {
59
  setBusy(isGenerating);
60
  }, [isGenerating]);
61
 
 
 
 
 
 
 
 
 
 
62
  const generate = async () => {
63
  setIsGenerating(true);
64
  setThought('');
@@ -213,6 +224,7 @@ export const ScriptMaker = ({
213
  <textarea
214
  className="textarea textarea-bordered w-full h-24 p-2"
215
  value={thought}
 
216
  readOnly
217
  ></textarea>
218
  </>
 
1
+ import { useEffect, useRef, useState } from 'react';
2
  import { CONFIG } from '../config';
3
  import {
4
  getBlogPrompt,
 
55
  const [thought, setThought] = useState<string>('');
56
  const [isGenerating, setIsGenerating] = useState<boolean>(false);
57
 
58
+ const refThought = useRef<HTMLTextAreaElement | null>(null);
59
+
60
  useEffect(() => {
61
  setBusy(isGenerating);
62
  }, [isGenerating]);
63
 
64
+ useEffect(() => {
65
+ setTimeout(() => {
66
+ // auto scroll
67
+ if (refThought.current) {
68
+ refThought.current.scrollTop = refThought.current.scrollHeight;
69
+ }
70
+ }, 10);
71
+ }, [thought]);
72
+
73
  const generate = async () => {
74
  setIsGenerating(true);
75
  setThought('');
 
224
  <textarea
225
  className="textarea textarea-bordered w-full h-24 p-2"
226
  value={thought}
227
+ ref={refThought}
228
  readOnly
229
  ></textarea>
230
  </>
front/src/utils/pipeline.ts ADDED
@@ -0,0 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { Podcast, PodcastTurn } from './types';
2
+ import {
3
+ addNoise,
4
+ addSilence,
5
+ generateAudio,
6
+ joinAudio,
7
+ loadWavAndDecode,
8
+ } from './utils';
9
+
10
+ // taken from https://freesound.org/people/artxmp1/sounds/660540
11
+ import openingSoundSrc from '../opening-sound.wav';
12
+
13
+ export interface GenerationStep {
14
+ turn: PodcastTurn;
15
+ audioBuffer?: AudioBuffer;
16
+ }
17
+
18
+ export interface PodcastGenerationOptions {
19
+ podcast: Podcast;
20
+ speaker1: string;
21
+ speaker2: string;
22
+ speed: number;
23
+ isAddIntroMusic: boolean;
24
+ isAddNoise: boolean;
25
+ }
26
+
27
+ export const pipelineGeneratePodcast = async (
28
+ {
29
+ podcast,
30
+ speaker1,
31
+ speaker2,
32
+ speed,
33
+ isAddIntroMusic,
34
+ isAddNoise,
35
+ }: PodcastGenerationOptions,
36
+ onUpdate: (done: number, total: number) => void
37
+ ) => {
38
+ let outputWav: AudioBuffer;
39
+ const { speakerNames, turns } = podcast;
40
+ for (const turn of turns) {
41
+ // normalize it
42
+ turn.nextGapMilisecs =
43
+ Math.max(-600, Math.min(300, turn.nextGapMilisecs)) - 100;
44
+ turn.text = turn.text
45
+ .trim()
46
+ .replace(/’/g, "'")
47
+ .replace(/“/g, '"')
48
+ .replace(/”/g, '"');
49
+ }
50
+ const steps: GenerationStep[] = turns.map((turn) => ({ turn }));
51
+ onUpdate(0, steps.length);
52
+ for (let i = 0; i < steps.length; i++) {
53
+ const step = steps[i];
54
+ const speakerIdx = speakerNames.indexOf(step.turn.speakerName as string) as
55
+ | 1
56
+ | 0;
57
+ const speakerVoice = speakerIdx === 0 ? speaker1 : speaker2;
58
+ const url = await generateAudio(step.turn.text, speakerVoice, speed);
59
+ step.audioBuffer = await loadWavAndDecode(url);
60
+ if (i === 0) {
61
+ outputWav = step.audioBuffer;
62
+ if (isAddIntroMusic) {
63
+ const openingSound = await loadWavAndDecode(openingSoundSrc);
64
+ outputWav = joinAudio(openingSound, outputWav!, -2000);
65
+ } else {
66
+ outputWav = addSilence(outputWav!, true, 200);
67
+ }
68
+ } else {
69
+ const lastStep = steps[i - 1];
70
+ outputWav = joinAudio(
71
+ outputWav!,
72
+ step.audioBuffer,
73
+ lastStep.turn.nextGapMilisecs
74
+ );
75
+ }
76
+ onUpdate(i + 1, steps.length);
77
+ }
78
+ if (isAddNoise) {
79
+ outputWav = addNoise(outputWav!, 0.002);
80
+ }
81
+ // @ts-expect-error this is fine
82
+ if (!outputWav) {
83
+ throw new Error('outputWav is undefined');
84
+ }
85
+ return outputWav;
86
+ };
index.html CHANGED
The diff for this file is too large to render. See raw diff