navalnica commited on
Commit
95849c2
Β·
1 Parent(s): 8797a8a

stability: use semaphore for tts text LLM preprocessing; retry tts_astream;

Browse files
.gitignore CHANGED
@@ -5,5 +5,6 @@ venv
5
  .DS_Store
6
 
7
  data/books
 
8
 
9
  .env
 
5
  .DS_Store
6
 
7
  data/books
8
+ data/audiobooks
9
 
10
  .env
README.md CHANGED
@@ -11,7 +11,9 @@ python_version: 3.11
11
  ---
12
 
13
  ### Action items
14
- - check new set of voices
 
 
15
  - intonations
16
  - add context
17
  - audio effects
@@ -19,14 +21,26 @@ python_version: 3.11
19
  - filter, apply only for long phrases
20
  - only for narrator?
21
  - checkbox! make effects great again (no) optional
22
- - limit on input text size
23
- - fix UI
24
- - slides / story
 
 
 
 
 
 
 
 
 
 
 
25
  - optimizations
 
26
  - combine sequential phrases of same character in single phrase
27
  - support large texts. use batching. problem: how to ensure same characters?
28
- - can detect characters in first prompt, then split text in each batch into character phrases
29
- - probably split large phrases into smaller ones
30
- - identify unknown characters
31
- - use LLM to recognize characters for a given text and provide descriptions detailed enough to select appropriate voice
32
 
 
11
  ---
12
 
13
  ### Action items
14
+
15
+ - voices
16
+ - filter to use only best voices
17
  - intonations
18
  - add context
19
  - audio effects
 
21
  - filter, apply only for long phrases
22
  - only for narrator?
23
  - checkbox! make effects great again (no) optional
24
+ - stability
25
+ - add limit on input text size (5000 chars)
26
+ - improve UI
27
+ - add error box
28
+ - add samples
29
+ - show character parts
30
+ - remove file upload pane
31
+ - labels on how long to wait
32
+ - labels describing components
33
+ - header and description
34
+ - prepare slides / story
35
+ - testing
36
+ - eval current execution time
37
+ - test on different text inputs
38
  - optimizations
39
+ - generate audio effects asynchronously
40
  - combine sequential phrases of same character in single phrase
41
  - support large texts. use batching. problem: how to ensure same characters?
42
+ - can detect characters in first prompt, then split text in each batch into character phrases
43
+ - probably split large phrases into smaller ones
44
+ - identify unknown characters
45
+ - use LLM to recognize characters for a given text and provide descriptions detailed enough to select appropriate voice
46
 
data/11labs_available_tts_voices.reviewed.csv ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ voice_id,name,preview_url,manual_quality_review,owner_id,permission_on_resource,is_legacy,is_mixed,accent,description,age,gender,category,language,descriptive
2
+ 8opUN7sGOKbyojnjvNdl,Angela,https://storage.googleapis.com/eleven-public-prod/KI49vYhSgygxHVkjNTjMnJEaS6H2/voices/V8OwijRGPjSNuaZo64vg/6923fbb2-a713-4d42-a590-b8999b74f644.mp3,ok,,admin,FALSE,FALSE,american,,middle_aged,female,conversational,,confident
3
+ yu4eXTP5aod8KAQzTI3T,"Claudia - Credible, Competent & Authentic",https://storage.googleapis.com/eleven-public-prod/database/user/Db6dCWW3pFVb0DPq7Q5s86VmuNQ2/voices/yu4eXTP5aod8KAQzTI3T/8zgX5ljCJ8kmBL0Q8rx7.mp3,ok,,admin,FALSE,FALSE,american,,middle_aged,female,conversational,en,confident
4
+ wL9XWq1lRS2ZxgYArGOm,Freya - Concise and Wellspoken,https://storage.googleapis.com/eleven-public-prod/ofJ3ST4rvZcJUjpc7t5B7gqcCz93/voices/eWgG9MAbD9umBchcWP1B/cebbd3e7-2460-43d7-8ae3-d20160bc3e44.mp3,medium,,admin,FALSE,FALSE,american,,middle_aged,female,conversational,,formal
5
+ GUM0hIboeFNpFw8Le7kY,Amber,https://storage.googleapis.com/eleven-public-prod/Eq8tyIYOx5PoyXO0aMbKV4xG1FW2/voices/XX9E8l5jsdMf8kszvsyX/b8a2aa2c-e3dc-42d0-b943-a2eb5d760315.mp3,bad,,admin,FALSE,FALSE,american,,middle_aged,female,conversational,,calm
6
+ MS8BX0O8omm5Ie6mL8v2,Kay - 65 years old,https://storage.googleapis.com/eleven-public-prod/8UfhnmuNoPSOxBcoJqW3NsegvUG2/voices/r8SplNeU9vfxsNrJFstn/823adabf-a99a-4537-ac95-90e2ee315d2d.mp3,ok,,admin,FALSE,FALSE,british,,old,female,conversational,,calm
7
+ NNYB2AxILPYTs53mGYij,Jenn,https://storage.googleapis.com/eleven-public-prod/63YWf9FcbyS5PpTayiUUxnQ95Cj2/voices/QxTS99N2QuIZNwTkGB5M/518786bf-ee01-400e-a690-c75193fee06a.mp3,bad,,admin,FALSE,FALSE,american,,old,female,conversational,,serious
8
+ RigKMMlryPP9jHT50Jzl,Sybil,https://storage.googleapis.com/eleven-public-prod/LPSMwp0QPIPEu76c5UE6a2Tg6kU2/voices/bRp2SymzfuLvibautVoX/eb9ad599-df42-4447-8d85-b19ac1333018.mp3,bad,,admin,FALSE,FALSE,australian,,old,female,conversational,,casual
9
+ v921MVi0ixhdc1CW2WxF,Aspexia - Grand & Clear,https://storage.googleapis.com/eleven-public-prod/lA5gT7FFPLVt66XJyVjpVR1ljPA2/voices/ZV3F48j5cMfp5ZlnpCs4/eb367cb4-5e11-4cbd-8fbe-c417bf21ffd3.mp3,ok,,admin,FALSE,FALSE,american,,old,female,conversational,,casual
10
+ yxHHj4hKJKKTyvUpnbds,Heather - Mature British lady,https://storage.googleapis.com/eleven-public-prod/28U448fBuufmkz5lMBQX6JLm8sw2/voices/f3SgjPzocHWjSn3VXeKy/c9a06d6e-50fa-4ce3-9e7f-f6fd8c252898.mp3,ok,,admin,FALSE,FALSE,british,,old,female,conversational,,mature
11
+ cgSgspJ2msm6clMCkdW9,Jessica,https://storage.googleapis.com/eleven-public-prod/premade/voices/cgSgspJ2msm6clMCkdW9/56a97bf8-b69b-448f-846c-c3a11683d45a.mp3,ok,,,FALSE,FALSE,american,expressive,young,female,conversational,,
12
+ 1btZhL2wthuOhUqvI0bB,Emily - pleasant teen voice,https://storage.googleapis.com/eleven-public-prod/opnveFdsOUSqSDvVg1e9RtN4lWG2/voices/tZ7CAYevCBDjnoUrMFli/430cc09c-257d-41cd-acdd-5177e05a193a.mp3,ok,,admin,FALSE,FALSE,british,,young,female,conversational,,pleasant
13
+ K5DRk4s8l1HFKsggS25u,Adrianna,https://storage.googleapis.com/eleven-public-prod/63YWf9FcbyS5PpTayiUUxnQ95Cj2/voices/ZWxdaecYtNJxT2ist5K7/201af84a-5d00-4c9f-999b-625bb8dab137.mp3,ok,,admin,FALSE,FALSE,australian,,young,female,conversational,,pleasant
14
+ PoHUWWWMHFrA8z7Q88pu,Miranda,https://storage.googleapis.com/eleven-public-prod/ZXqeDajThsaYR96DSywtY94M8Dy1/voices/PoHUWWWMHFrA8z7Q88pu/403a094a-c4e4-431f-b96a-7c63ae76508d.mp3,ok,,admin,FALSE,FALSE,american,,young,female,conversational,en,cute
15
+ Qo4wB0XcrmHFXFyP38mc,Sasha - Soothing and Chill,https://storage.googleapis.com/eleven-public-prod/u9529nqLipaEPA53CKpxKF7nqLH2/voices/mnp8rIZc6Kiem1xaJDaM/95b33e4a-e41b-4fda-9eb8-cab3039bce60.mp3,ok,,admin,FALSE,FALSE,american,,young,female,conversational,,chill
16
+ U0gWepDCHKmmexbBxeSX,Alicia - Black Female,https://storage.googleapis.com/eleven-public-prod/laura1cGHOTJh1Ah99tjAoTLy7z1/voices/NBcGlQxeT5lFe7hgzwTR/f69f1f70-26a0-403c-915d-1992b6d8cfd0.mp3,ok,,admin,FALSE,FALSE,american,,young,female,conversational,,crisp
17
+ rfkTsdZrVWEVhDycUYn9,Shelby,https://storage.googleapis.com/eleven-public-prod/database/user/amWos2WP7hRs0yPBtA0OwjuzHAH2/voices/rfkTsdZrVWEVhDycUYn9/iiFLm9bEQ83kiP0bExhG.mp3,ok,,admin,FALSE,FALSE,british,,young,female,conversational,en,pleasant
18
+ teAOBFSeynXfbyNgq6Ec,Ally - Curious and Chill,https://storage.googleapis.com/eleven-public-prod/ofJ3ST4rvZcJUjpc7t5B7gqcCz93/voices/mW6Z7SZeRqOvuzCTgxyk/2132b3d1-fdcf-4036-85c9-bdd003941130.mp3,bad,,admin,FALSE,FALSE,american,,young,female,conversational,,chill
19
+ IKne3meq5aSn9XLyUdCD,Charlie,https://storage.googleapis.com/eleven-public-prod/premade/voices/IKne3meq5aSn9XLyUdCD/102de6f2-22ed-43e0-a1f1-111fa75c5481.mp3,ok,,,FALSE,FALSE,australian,natural,middle_aged,male,conversational,,
20
+ cjVigY5qzO86Huf0OWal,Eric,https://storage.googleapis.com/eleven-public-prod/premade/voices/cjVigY5qzO86Huf0OWal/d098fda0-6456-4030-b3d8-63aa048c9070.mp3,medium,,,FALSE,FALSE,american,friendly,middle_aged,male,conversational,,
21
+ BFUk567oZITYKwOqegEq,Riley - loud and intense,https://storage.googleapis.com/eleven-public-prod/UwDtqCF44YaL77wxb8DVQlHT5Gp1/voices/60G0VdAP3WBQQbE6tSkT/ecc00def-2543-4b50-b93d-5d4b6c7dca33.mp3,very bad,,admin,FALSE,FALSE,american,,middle_aged,male,conversational,,intense
22
+ EkuRA6XL9UbflTWEtNbQ,Middle age Southern Male,https://storage.googleapis.com/eleven-public-prod/0gh9bWjaVmNOvQJVcRddxeYIS2z1/voices/t5Oo3tZSuEZt6BD2VGV4/5c0177c5-46bd-414c-abfd-6cd6d5677f08.mp3,medium,,admin,FALSE,FALSE,american,,middle_aged,male,conversational,,casual
23
+ MP7UPhn7eVWqCGJGIh6Q,Aaron Patrick - Fun-Upbeat,https://storage.googleapis.com/eleven-public-prod/database/user/ktIm5hvnGlc2TVlwOiZmbmw9kHy2/voices/MP7UPhn7eVWqCGJGIh6Q/NFiMZncqQJ0IFTzFGbwQ.mp3,ok,,admin,FALSE,FALSE,american,,middle_aged,male,conversational,en,upbeat
24
+ RPEIZnKMqlQiZyZd1Dae,Christopher - friendly guy next door,https://storage.googleapis.com/eleven-public-prod/database/user/HURZYaLa4shZEqiT75qd5tyEsSr1/voices/RPEIZnKMqlQiZyZd1Dae/FwLtZ4mCBHV0eLjbUM8Y.mp3,ok,,admin,FALSE,FALSE,american,,middle_aged,male,conversational,en,casual
25
+ Tx7VLgfksXHVnoY6jDGU,"Conversational Joe - A chatty casual voice, British RP male",https://storage.googleapis.com/eleven-public-prod/database/user/wf6Rmje05ZbqeHYfK82ThsPKouC2/voices/Tx7VLgfksXHVnoY6jDGU/ab4X4F9RcNSeTwBS8KS9.mp3,ok,,admin,FALSE,FALSE,british,,middle_aged,male,conversational,en,casual
26
+ WLKp2jV6nrS8aMkPPDRO,Paul - Australian Professional Presenter,https://storage.googleapis.com/eleven-public-prod/custom/voices/WLKp2jV6nrS8aMkPPDRO/eaTiwieru6XA3d8ocVFK.mp3,ok,,admin,FALSE,FALSE,australian,,middle_aged,male,conversational,en,professional
27
+ r5Al4oLBZaQ3SY9ncbVG,"Kass - Energetic, Casual, Engaging",https://storage.googleapis.com/eleven-public-prod/R51RaBhjNqbqLhP6cvEePomgTZX2/voices/r5Al4oLBZaQ3SY9ncbVG/18422b0c-f056-467c-997e-6a13854baea6.mp3,ok,,admin,FALSE,FALSE,american,,middle_aged,male,conversational,en,excited
28
+ NWQswDS4aKfvBwYabvGT,Călin OZ,https://storage.googleapis.com/eleven-public-prod/FJwaL91a8SZIMeJz4Nx1ea8F1N63/voices/fi2r0fw0bQUShZBKDyrI/ba51edb4-2776-410b-aee9-597a25ec7492.mp3,bad,,admin,FALSE,FALSE,american,,old,male,conversational,,wise
29
+ QT02s1mEPA02kfid1nTk,"Donald - American, 70 years old",https://storage.googleapis.com/eleven-public-prod/8UfhnmuNoPSOxBcoJqW3NsegvUG2/voices/MIer4gjk6bcEMGSRRkOs/06d81bc4-5cfa-4f46-b269-6cb655a45ad9.mp3,ok,,admin,FALSE,FALSE,american,,old,male,conversational,,intense
30
+ eAyDL3LKBhCxNF21PbuQ,Mike - teacher,https://storage.googleapis.com/eleven-public-prod/ZNTWuC7IYQQWqzzeKbpLS0FeplI3/voices/Au1xn1q4XOiitEswfIzo/56b03843-3723-4617-a486-bbedb0f84828.mp3,bad,,admin,FALSE,FALSE,american,,old,male,conversational,,pleasant
31
+ fQpAewJVtu0DoZkQ58bW,Old man with a soft voice,https://storage.googleapis.com/eleven-public-prod/uRcoDhI6DVf8aT39RswuDWK2C0P2/voices/xDBqezbIeYil8jkpc3c0/d2771c6e-0383-4050-bfa8-84a963baa098.mp3,bad,,admin,FALSE,FALSE,american,,old,male,conversational,,calm
32
+ z3CSZSXDkv7fvwqfOrZN,Bryan,https://storage.googleapis.com/eleven-public-prod/Y3yb5Fdg8aVDi9n3MLjYB1v0tMF2/voices/0HpbeTZr9DdE1r0eGVn1/4c4cd4a7-c2ea-45d3-b402-55fc875240e0.mp3,medium,,admin,FALSE,FALSE,american,,old,male,conversational,,confident
33
+ 1RVpBInY9YUYMLSUQReV,Cody McAvoy,https://storage.googleapis.com/eleven-public-prod/EzfotYSdpJdL97IDcxmmjJAFhrB3/voices/1RVpBInY9YUYMLSUQReV/f4807088-b6ff-48fa-83ca-a060fe724d10.mp3,ok,,admin,FALSE,FALSE,american,,young,male,conversational,en,pleasant
34
+ IFtWI8YHaBY8I7EB217u,ADAM,https://storage.googleapis.com/eleven-public-prod/21EHOpCpDMOQhKNMRI94rNioulW2/voices/4KEs1SFpz8x6fuT7MB5B/e0992f5a-2528-4a2a-981c-0470ccb0e935.mp3,bad,,admin,FALSE,FALSE,american,,young,male,conversational,,pleasant
35
+ xSI29a9HAKdsWv3idXSN,Ali,https://storage.googleapis.com/eleven-public-prod/Je2OUP5SdgSZhG40J7umHz4bhXB3/voices/IlNTfQEoyv1KrV6Jbb0c/a4ba8772-1400-4910-adee-9e0dd65e2b74.mp3,ok,,admin,FALSE,FALSE,american,,young,male,conversational,,intense
data/samples_to_split.py CHANGED
@@ -96,3 +96,145 @@ up on us and first thing you know—”
96
 
97
  β€œDon’t believe everything you hear, Nick,” he advised me.
98
  """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
96
 
97
  β€œDon’t believe everything you hear, Nick,” he advised me.
98
  """
99
+
100
+ WONDERFUL_CHRISTMAS_1 = """\
101
+ β€œDid the "Snow" go down? Who knows?” she asked, sobbing and crying, and
102
+ quite broken down by hearing the fact put into words.
103
+
104
+ β€œI’m sorry we said a word,” said Kate, crying with Mrs. Dobson; and,
105
+ dropping down upon a footstool, she laid her head in Mrs. Dobson’s lap.
106
+
107
+ β€œYou will please forgive us, won’t you?” spoke up the boy in the bed.
108
+
109
+ β€œForgive you! Yes, indeed. I’m glad you thought enough about my
110
+ poorβ€”about Captain Dobson to talk of him at all, and to-day, too! Why, I
111
+ always go down the harbor to-day; it is just thirty-seven years ago
112
+ to-day since we were married and the ship went on its voyage.”
113
+
114
+ β€œAnd you will go this afternoon, won’t you?” questioned Kate.
115
+
116
+ β€œI mustn’t,” she said simply.
117
+
118
+ β€œWhy not?”
119
+
120
+ β€œWhy, I’ve got a little boy of my own to look after to-day, don’t you
121
+ see?” she said, suddenly smiling, with an uplifted face.
122
+ """
123
+
124
+ # lots of narrator speech
125
+ WONDERFUL_CHRISTMAS_2 = """\
126
+ Harry Cornwall ran out from the tent to watch the flight of Frank
127
+ Hallock, who ran, frightened by the tiger, and as the boy disappeared
128
+ from sight over the hilltop, Harry determined to try and find time
129
+ enough that very evening to run up to the big house and tell all he knew
130
+ about the necklace that somebody had tucked into the pocket of the
131
+ waistcoat that fell to Jack Flibbit after the great fire. Harry could
132
+ not persuade himself that two little girls in the land had put two
133
+ necklaces, with the same mark, into pockets to go β€œOut West.” Harry’s
134
+ name was called in a loud tone, and he ran to obey the call at about the
135
+ same moment that Frank Hallock reached the cornfield and picked up his
136
+ hoe.
137
+
138
+ Frank counted the hills and the rows, and scarcely looked up until the
139
+ sound of the one o’clock train, on the New Haven Railroad, passing
140
+ through the town, told to him how short the hour had been.
141
+
142
+ Then Neptune came home. Frank knew that Kate was on the carriage-road
143
+ that ran past the field, but he would not look up, not even when he
144
+ heard her cheery call to him; so Neptune and Kate went on their way, and
145
+ presently the welcome sound of the dinner-horn was heard.
146
+
147
+ Frank did not throw down his hoe, but fell to work harder than ever. In
148
+ five minutes’ time the horn was sounded again, and on looking up, Frank
149
+ beheld Kate standing on the verandaβ€”she was waving her hat to attract
150
+ his attention. He was inclined to make a martyr of himself just then, so
151
+ he waved his straw hat in return, and immediately resolved to β€œhoe away
152
+ like a major.”
153
+
154
+ In fifteen minutes more Kate was making her way for the second time that
155
+ day through Frank’s cornfield. As she drew near, she called out, β€œFrank!
156
+ Frank! why in the world don’t you come to dinner? There is a gentleman
157
+ at table who came to see papa on business, and I ran away after the
158
+ soupβ€”I couldn’t eat my dinner one bit, without you.”
159
+
160
+ β€œYou’ll _have_ to, I reckon,” returned Frank; β€œa poor fellow, like me,
161
+ who has to hoe corn all day, can’t stop to eat.”
162
+
163
+ β€œO, Frank Hallock! _for shame!_” cried Kate, putting down her indignant
164
+ foot without being able to make noise enough about it to disturb an
165
+ earthworm.
166
+
167
+ β€œIt’s _true_,” responded Frank, pitching into the next hill with all his
168
+ might.
169
+
170
+ β€œIt is _not_ true,” cried Kate; β€œand if just running off to _look_ at
171
+ the circus pass by makes you say such things, I am glad you can’t go to
172
+ see it.”\
173
+ """
174
+
175
+
176
+ ARCH_WIKI_1 = """\
177
+ This document is an annotated index of popular articles and important information for improving and adding functionalities to the installed Arch system. Readers are assumed to have read and followed the Installation guide to obtain a basic Arch Linux installation. Having read and understood the concepts explained in #System administration and #Package management is required for following the other sections of this page and the other articles in the wiki.
178
+ System administration
179
+
180
+ This section deals with administrative tasks and system management. See Core utilities and Category:System administration for more.
181
+ Users and groups
182
+
183
+ A new installation leaves you with only the superuser account, better known as "root". Logging in as root for prolonged periods of time, possibly even exposing it via SSH on a server, is insecure. Instead, you should create and use unprivileged user account(s) for most tasks, only using the root account for system administration. See Users and groups#User management for details.
184
+
185
+ Users and groups are a mechanism for access control; administrators may fine-tune group membership and ownership to grant or deny users and services access to system resources. Read the Users and groups article for details and potential security risks.
186
+ Security
187
+
188
+ Read Security for recommendations and best practices on hardening the system.
189
+
190
+ For a list of applications to allow running commands or starting an interactive shell as another user (e.g. root), see List of applications/Security#Privilege elevation.
191
+ Service management
192
+
193
+ Arch Linux uses systemd as the init process, which is a system and service manager for Linux. For maintaining your Arch Linux installation, it is a good idea to learn the basics about it.
194
+
195
+ Interaction with systemd is done through the systemctl command. See systemd#Basic systemctl usage for more information.
196
+
197
+ A logging system is also provided, with the command journalctl. See journal for more information.
198
+ System maintenance
199
+
200
+ Arch is a rolling release system and has rapid package turnover, so users have to take some time to do system maintenance.
201
+ Package management
202
+
203
+ This section contains helpful information related to package management. See FAQ#Package management and Category:Package management for more.
204
+ Note: It is imperative to keep up to date with changes in Arch Linux that require manual intervention before upgrading your system. Subscribe to the arch-announce mailing list or the recent news RSS feed. Alternatively, check the front page Arch news every time before you update.
205
+ pacman
206
+
207
+ pacman is the Arch Linux package manager: it is highly encouraged to become familiar with it before reading any other articles.
208
+
209
+ To enable downloading packages in parallel, see pacman#Enabling parallel downloads.
210
+
211
+ For long term handling of cached packages, see pacman#Cleaning the package cache.
212
+
213
+ See pacman/Tips and tricks for suggestions on how to improve your interaction with pacman and package management in general.
214
+ Repositories
215
+
216
+ See the Official repositories article for details about the purpose of each officially maintained repository.
217
+
218
+ If you plan on using 32-bit applications, you will want to enable the multilib repository.
219
+
220
+ The Unofficial user repositories article lists several other unsupported repositories.
221
+
222
+ You may consider installing the pkgstats service.
223
+ Mirrors
224
+
225
+ Visit the Mirrors article for steps on taking full advantage of using the fastest and most up to date mirrors of the official repositories. As explained in the article, a particularly good advice is to routinely check the Mirror Status page for a list of mirrors that have been recently synced. This can be automated with Reflector.
226
+ Arch Build System
227
+
228
+ Ports is a system initially used by BSD distributions consisting of build scripts that reside in a directory tree on the local system. Simply put, each port contains a script within a directory intuitively named after the installable third-party application.
229
+
230
+ The Arch build system offers the same functionality by providing build scripts called PKGBUILDs, which are populated with information for a given piece of software: integrity hashes, project URL, version, license and build instructions. These PKGBUILDs are parsed by makepkg, the actual program that generates packages that are cleanly manageable by pacman.
231
+
232
+ Every package in the repositories along with those present in the AUR are subject to recompilation with makepkg.
233
+ Arch User Repository
234
+
235
+ While the Arch Build System allows the ability of building software available in the official repositories, the Arch User Repository (AUR) is the equivalent for user submitted packages. It is an unsupported repository of build scripts accessible through the web interface or through the Aurweb RPC interface.
236
+ Booting
237
+
238
+ This section contains information pertaining to the boot process. An overview of the Arch boot process can be found at Arch boot process. See Category:Boot process for more.
239
+ Hardware auto-recognition
240
+ """
src/audio_generators.py CHANGED
@@ -9,11 +9,14 @@ from langchain_community.callbacks import get_openai_callback
9
  from pydub import AudioSegment
10
 
11
  from src.lc_callbacks import LCMessageLoggerAsync
12
- from src.tts import tts_astream, sound_generation_astream
13
  from src.utils import auto_retry, consume_aiter
14
- from src.emotions.generation import EffectGeneratorAsync
 
 
 
15
  from src.emotions.utils import add_overlay_for_audio
16
- from src.config import ELEVENLABS_MAX_PARALLEL, logger
17
  from src.text_split_chain import SplitTextOutput
18
 
19
 
@@ -28,8 +31,8 @@ class AudioGeneratorSimple:
28
 
29
  async def tts_astream_with_semaphore(voice_id: str, text: str):
30
  async with semaphore:
31
- iter_ = tts_astream(voice_id=voice_id, text=text)
32
- bytes_ = await consume_aiter(iter_)
33
  return bytes_
34
 
35
  tasks = []
@@ -77,12 +80,10 @@ class AudioGeneratorWithEffects:
77
  )
78
  logger.info(f"{generate_effects = }, {lines_for_sound_effect = }")
79
 
80
- # Step 1: Process and modify text
81
- modified_texts, sound_emotion_results = await self._process_and_modify_text(
82
  text_split, lines_for_sound_effect
83
  )
84
 
85
- # Step 2: Generate TTS audio for modified text
86
  tts_results, self.temp_files = await self._generate_tts_audio(
87
  text_split, modified_texts, character_to_voice
88
  )
@@ -93,8 +94,12 @@ class AudioGeneratorWithEffects:
93
  )
94
 
95
  # Step 4: Merge audio files
96
- normalized_audio_chunks = self._normalize_audio_chunks(audio_chunks, self.temp_files)
97
- final_output = self._merge_audio_files(normalized_audio_chunks, save_path=out_path)
 
 
 
 
98
 
99
  # Clean up temporary files
100
  self._cleanup_temp_files(self.temp_files)
@@ -105,34 +110,51 @@ class AudioGeneratorWithEffects:
105
  """Select % of the lines randomly for sound effect generation."""
106
  return random.sample(range(num_lines), k=int(fraction * num_lines))
107
 
108
- async def _process_and_modify_text(
109
  self, text_split: SplitTextOutput, lines_for_sound_effect: list[int]
110
  ) -> tuple[list[dict], list[dict]]:
111
- """Process the text by modifying it and generating tasks for sound effects."""
112
- tasks_for_text_modification = []
113
- sound_emotion_tasks = []
 
 
 
 
 
 
 
 
114
 
115
  for idx, character_phrase in enumerate(text_split.phrases):
116
  character_text = character_phrase.text.strip().lower()
117
 
118
- # Add text emotion modification task
119
- tasks_for_text_modification.append(
120
- self.effect_generator.add_emotion_to_text(character_text)
 
 
121
  )
122
 
123
  # If this line needs sound effects, generate parameters
124
  if idx in lines_for_sound_effect:
125
- sound_emotion_tasks.append(
126
- self.effect_generator.generate_parameters_for_sound_effect(
127
- character_text
 
128
  )
129
  )
130
 
131
- # Await tasks for text modification and sound effects
132
- modified_texts = await asyncio.gather(*tasks_for_text_modification)
133
- sound_emotion_results = await asyncio.gather(*sound_emotion_tasks)
 
 
 
 
 
 
134
 
135
- return modified_texts, sound_emotion_results
136
 
137
  async def _generate_tts_audio(
138
  self,
@@ -146,8 +168,10 @@ class AudioGeneratorWithEffects:
146
 
147
  async def tts_astream_with_semaphore(voice_id: str, text: str, params: dict):
148
  async with self.semaphore:
149
- iter_ = tts_astream(voice_id=voice_id, text=text, params=params)
150
- bytes_ = await consume_aiter(iter_)
 
 
151
  return bytes_
152
 
153
  for idx, (modified_text, character_phrase) in enumerate(
@@ -240,7 +264,9 @@ class AudioGeneratorWithEffects:
240
 
241
  return normalized_files
242
 
243
- def _merge_audio_files(self, audio_filenames: list[str], save_path: Path | None = None) -> Path:
 
 
244
  """Helper function to merge multiple audio files into one."""
245
  combined = AudioSegment.from_file(audio_filenames[0])
246
  for filename in audio_filenames[1:]:
 
9
  from pydub import AudioSegment
10
 
11
  from src.lc_callbacks import LCMessageLoggerAsync
12
+ from src.tts import tts_astream_consumed, sound_generation_astream
13
  from src.utils import auto_retry, consume_aiter
14
+ from src.emotions.generation import (
15
+ EffectGeneratorAsync,
16
+ TextPreparationForTTSTaskOutput,
17
+ )
18
  from src.emotions.utils import add_overlay_for_audio
19
+ from src.config import ELEVENLABS_MAX_PARALLEL, logger, OPENAI_MAX_PARALLEL
20
  from src.text_split_chain import SplitTextOutput
21
 
22
 
 
31
 
32
  async def tts_astream_with_semaphore(voice_id: str, text: str):
33
  async with semaphore:
34
+ bytes_ = await tts_astream_consumed(voice_id=voice_id, text=text)
35
+ # bytes_ = await consume_aiter(iter_)
36
  return bytes_
37
 
38
  tasks = []
 
80
  )
81
  logger.info(f"{generate_effects = }, {lines_for_sound_effect = }")
82
 
83
+ modified_texts, sound_emotion_results = await self._prepare_text_for_tts(
 
84
  text_split, lines_for_sound_effect
85
  )
86
 
 
87
  tts_results, self.temp_files = await self._generate_tts_audio(
88
  text_split, modified_texts, character_to_voice
89
  )
 
94
  )
95
 
96
  # Step 4: Merge audio files
97
+ normalized_audio_chunks = self._normalize_audio_chunks(
98
+ audio_chunks, self.temp_files
99
+ )
100
+ final_output = self._merge_audio_files(
101
+ normalized_audio_chunks, save_path=out_path
102
+ )
103
 
104
  # Clean up temporary files
105
  self._cleanup_temp_files(self.temp_files)
 
110
  """Select % of the lines randomly for sound effect generation."""
111
  return random.sample(range(num_lines), k=int(fraction * num_lines))
112
 
113
+ async def _prepare_text_for_tts(
114
  self, text_split: SplitTextOutput, lines_for_sound_effect: list[int]
115
  ) -> tuple[list[dict], list[dict]]:
116
+ semaphore = asyncio.Semaphore(OPENAI_MAX_PARALLEL)
117
+
118
+ async def run_task_with_semaphore(func, **params):
119
+ async with semaphore:
120
+ outputs = await func(**params)
121
+ return outputs
122
+
123
+ task_emotion_code = "add_emotion"
124
+ task_effects_code = "add_effects"
125
+
126
+ tasks = []
127
 
128
  for idx, character_phrase in enumerate(text_split.phrases):
129
  character_text = character_phrase.text.strip().lower()
130
 
131
+ tasks.append(
132
+ run_task_with_semaphore(
133
+ func=self.effect_generator.add_emotion_to_text,
134
+ text=character_text,
135
+ )
136
  )
137
 
138
  # If this line needs sound effects, generate parameters
139
  if idx in lines_for_sound_effect:
140
+ tasks.append(
141
+ run_task_with_semaphore(
142
+ func=self.effect_generator.generate_parameters_for_sound_effect,
143
+ text=character_text,
144
  )
145
  )
146
 
147
+ tasks_results: list[TextPreparationForTTSTaskOutput] = []
148
+ tasks_results = await asyncio.gather(*tasks)
149
+
150
+ emotion_tasks_results = [
151
+ x.output for x in tasks_results if x.task == task_emotion_code
152
+ ]
153
+ effects_tasks_results = [
154
+ x.output for x in tasks_results if x.task == task_effects_code
155
+ ]
156
 
157
+ return emotion_tasks_results, effects_tasks_results
158
 
159
  async def _generate_tts_audio(
160
  self,
 
168
 
169
  async def tts_astream_with_semaphore(voice_id: str, text: str, params: dict):
170
  async with self.semaphore:
171
+ bytes_ = await tts_astream_consumed(
172
+ voice_id=voice_id, text=text, params=params
173
+ )
174
+ # bytes_ = await consume_aiter(iter_)
175
  return bytes_
176
 
177
  for idx, (modified_text, character_phrase) in enumerate(
 
264
 
265
  return normalized_files
266
 
267
+ def _merge_audio_files(
268
+ self, audio_filenames: list[str], save_path: Path | None = None
269
+ ) -> Path:
270
  """Helper function to merge multiple audio files into one."""
271
  combined = AudioSegment.from_file(audio_filenames[0])
272
  for filename in audio_filenames[1:]:
src/builder.py CHANGED
@@ -9,10 +9,8 @@ from src.utils import GPTModels
9
 
10
  class AudiobookBuilder:
11
 
12
- def __init__(self) -> None:
13
- self.voice_selector = VoiceSelector(
14
- csv_table_fp="data/11labs_available_tts_voices.csv"
15
- )
16
  self.audio_generator = AudioGeneratorWithEffects()
17
 
18
  async def split_text(self, text: str) -> SplitTextOutput:
 
9
 
10
  class AudiobookBuilder:
11
 
12
+ def __init__(self):
13
+ self.voice_selector = VoiceSelector()
 
 
14
  self.audio_generator = AudioGeneratorWithEffects()
15
 
16
  async def split_text(self, text: str) -> SplitTextOutput:
src/config.py CHANGED
@@ -12,4 +12,8 @@ ELEVENLABS_API_KEY = os.environ["ELEVEN_LABS_API_KEY"]
12
 
13
  FILE_SIZE_MAX = 0.5 # in mb
14
 
 
15
  ELEVENLABS_MAX_PARALLEL = 15 # current limitation of available subscription
 
 
 
 
12
 
13
  FILE_SIZE_MAX = 0.5 # in mb
14
 
15
+ OPENAI_MAX_PARALLEL = 8 # empirically set
16
  ELEVENLABS_MAX_PARALLEL = 15 # current limitation of available subscription
17
+
18
+ # VOICES_CSV_FP = "data/11labs_available_tts_voices.csv"
19
+ VOICES_CSV_FP = "data/11labs_available_tts_voices.reviewed.csv"
src/emotions/generation.py CHANGED
@@ -1,7 +1,9 @@
1
  import json
 
2
  from abc import ABC, abstractmethod
3
 
4
  import openai
 
5
  from requests import HTTPError
6
 
7
  from src.config import OPENAI_API_KEY, logger
@@ -16,105 +18,110 @@ from .prompts import (
16
  from .utils import get_audio_duration
17
 
18
 
 
 
 
 
 
19
  class AbstractEffectGenerator(ABC):
20
  @abstractmethod
21
- def generate_text_for_sound_effect(self, text) -> dict:
22
  pass
23
 
24
  @abstractmethod
25
- def generate_parameters_for_sound_effect(
26
- self, text: str, generated_audio_file: str
27
- ) -> dict:
28
  pass
29
 
30
  @abstractmethod
31
- def add_emotion_to_text(self, text: str) -> dict:
32
  pass
33
 
34
 
35
- class EffectGenerator(AbstractEffectGenerator):
36
- def __init__(self, predict_duration: bool = True, model_type: str = "gpt-4o"):
37
- self.client = openai.OpenAI(api_key=OPENAI_API_KEY)
38
- self.sound_effect_prompt = (
39
- SOUND_EFFECT_GENERATION
40
- if predict_duration
41
- else SOUND_EFFECT_GENERATION_WITHOUT_DURATION_PREDICTION
42
- )
43
- self.text_modification_prompt = TEXT_MODIFICATION_WITH_SSML
44
- self.model_type = model_type
45
- logger.info(
46
- f"EffectGenerator initialized with model_type: {model_type}, predict_duration: {predict_duration}"
47
- )
48
-
49
- @auto_retry
50
- def generate_text_for_sound_effect(self, text: str) -> dict:
51
- """Generate sound effect description and parameters based on input text."""
52
- try:
53
- completion = self.client.chat.completions.create(
54
- model=self.model_type,
55
- messages=[
56
- {"role": "system", "content": self.sound_effect_prompt},
57
- {"role": "user", "content": text},
58
- ],
59
- response_format={"type": "json_object"},
60
- )
61
- # Extracting the output
62
- chatgpt_output = completion.choices[0].message.content
63
-
64
- # Parse and return JSON response
65
- output_dict = json.loads(chatgpt_output)
66
- logger.info(
67
- "Successfully generated sound effect description: %s", output_dict
68
- )
69
- return output_dict
70
-
71
- except json.JSONDecodeError as e:
72
- logger.error("Failed to parse the output text as JSON: %s", e)
73
- raise RuntimeError(
74
- f"Error: Failed to parse the output text as JSON.\nOutput: {chatgpt_output}"
75
- )
76
-
77
- except HTTPError as e:
78
- logger.error("HTTP error occurred: %s", e)
79
- raise RuntimeError(f"HTTP Error: {e}")
80
-
81
- except Exception as e:
82
- logger.error("Unexpected error occurred: %s", e)
83
- raise RuntimeError(f"Unexpected Error: {e}")
84
-
85
- @auto_retry
86
- def generate_parameters_for_sound_effect(
87
- self, text: str, generated_audio_file: str = None
88
- ) -> dict:
89
- llm_output = self.generate_text_for_sound_effect(text)
90
- if generated_audio_file is not None:
91
- llm_output["duration_seconds"] = get_audio_duration(generated_audio_file)
92
- logger.info(
93
- "Added duration_seconds to output based on generated audio file: %s",
94
- generated_audio_file,
95
- )
96
- return llm_output
97
-
98
- @auto_retry
99
- def add_emotion_to_text(self, text: str) -> dict:
100
- completion = self.client.chat.completions.create(
101
- model=self.model_type,
102
- messages=[
103
- {"role": "system", "content": self.text_modification_prompt},
104
- {"role": "user", "content": text},
105
- ],
106
- response_format={"type": "json_object"},
107
- )
108
- chatgpt_output = completion.choices[0].message.content
109
- try:
110
- output_dict = json.loads(chatgpt_output)
111
- logger.info(
112
- "Successfully modified text with emotional cues: %s", output_dict
113
- )
114
- return output_dict
115
- except json.JSONDecodeError as e:
116
- logger.error("Error in parsing the modified text: %s", e)
117
- raise f"error, output_text: {chatgpt_output}"
118
 
119
 
120
  class EffectGeneratorAsync(AbstractEffectGenerator):
@@ -166,8 +173,8 @@ class EffectGeneratorAsync(AbstractEffectGenerator):
166
 
167
  @auto_retry
168
  async def generate_parameters_for_sound_effect(
169
- self, text: str, generated_audio_file: str = None
170
- ) -> dict:
171
  llm_output = await self.generate_text_for_sound_effect(text)
172
  if generated_audio_file is not None:
173
  llm_output["duration_seconds"] = get_audio_duration(generated_audio_file)
@@ -175,10 +182,10 @@ class EffectGeneratorAsync(AbstractEffectGenerator):
175
  "Added duration_seconds to output based on generated audio file: %s",
176
  generated_audio_file,
177
  )
178
- return llm_output
179
 
180
  @auto_retry
181
- async def add_emotion_to_text(self, text: str) -> dict:
182
  completion = await self.client.chat.completions.create(
183
  model=self.model_type,
184
  messages=[
@@ -193,7 +200,9 @@ class EffectGeneratorAsync(AbstractEffectGenerator):
193
  logger.info(
194
  "Successfully modified text with emotional cues: %s", output_dict
195
  )
196
- return output_dict
 
 
197
  except json.JSONDecodeError as e:
198
  logger.error("Error in parsing the modified text: %s", e)
199
  raise f"error, output_text: {chatgpt_output}"
 
1
  import json
2
+ import typing as t
3
  from abc import ABC, abstractmethod
4
 
5
  import openai
6
+ from pydantic import BaseModel
7
  from requests import HTTPError
8
 
9
  from src.config import OPENAI_API_KEY, logger
 
18
  from .utils import get_audio_duration
19
 
20
 
21
+ class TextPreparationForTTSTaskOutput(BaseModel):
22
+ task: str
23
+ output: t.Any
24
+
25
+
26
  class AbstractEffectGenerator(ABC):
27
  @abstractmethod
28
+ async def generate_text_for_sound_effect(self, text) -> dict:
29
  pass
30
 
31
  @abstractmethod
32
+ async def generate_parameters_for_sound_effect(
33
+ self, text: str, generated_audio_file: str | None
34
+ ) -> TextPreparationForTTSTaskOutput:
35
  pass
36
 
37
  @abstractmethod
38
+ async def add_emotion_to_text(self, text: str) -> TextPreparationForTTSTaskOutput:
39
  pass
40
 
41
 
42
+ # class EffectGenerator(AbstractEffectGenerator):
43
+ # def __init__(self, predict_duration: bool = True, model_type: str = "gpt-4o"):
44
+ # self.client = openai.OpenAI(api_key=OPENAI_API_KEY)
45
+ # self.sound_effect_prompt = (
46
+ # SOUND_EFFECT_GENERATION
47
+ # if predict_duration
48
+ # else SOUND_EFFECT_GENERATION_WITHOUT_DURATION_PREDICTION
49
+ # )
50
+ # self.text_modification_prompt = TEXT_MODIFICATION_WITH_SSML
51
+ # self.model_type = model_type
52
+ # logger.info(
53
+ # f"EffectGenerator initialized with model_type: {model_type}, predict_duration: {predict_duration}"
54
+ # )
55
+
56
+ # @auto_retry
57
+ # def generate_text_for_sound_effect(self, text: str) -> dict:
58
+ # """Generate sound effect description and parameters based on input text."""
59
+ # try:
60
+ # completion = self.client.chat.completions.create(
61
+ # model=self.model_type,
62
+ # messages=[
63
+ # {"role": "system", "content": self.sound_effect_prompt},
64
+ # {"role": "user", "content": text},
65
+ # ],
66
+ # response_format={"type": "json_object"},
67
+ # )
68
+ # # Extracting the output
69
+ # chatgpt_output = completion.choices[0].message.content
70
+
71
+ # # Parse and return JSON response
72
+ # output_dict = json.loads(chatgpt_output)
73
+ # logger.info(
74
+ # "Successfully generated sound effect description: %s", output_dict
75
+ # )
76
+ # return output_dict
77
+
78
+ # except json.JSONDecodeError as e:
79
+ # logger.error("Failed to parse the output text as JSON: %s", e)
80
+ # raise RuntimeError(
81
+ # f"Error: Failed to parse the output text as JSON.\nOutput: {chatgpt_output}"
82
+ # )
83
+
84
+ # except HTTPError as e:
85
+ # logger.error("HTTP error occurred: %s", e)
86
+ # raise RuntimeError(f"HTTP Error: {e}")
87
+
88
+ # except Exception as e:
89
+ # logger.error("Unexpected error occurred: %s", e)
90
+ # raise RuntimeError(f"Unexpected Error: {e}")
91
+
92
+ # @auto_retry
93
+ # def generate_parameters_for_sound_effect(
94
+ # self, text: str, generated_audio_file: str = None
95
+ # ) -> dict:
96
+ # llm_output = self.generate_text_for_sound_effect(text)
97
+ # if generated_audio_file is not None:
98
+ # llm_output["duration_seconds"] = get_audio_duration(generated_audio_file)
99
+ # logger.info(
100
+ # "Added duration_seconds to output based on generated audio file: %s",
101
+ # generated_audio_file,
102
+ # )
103
+ # return llm_output
104
+
105
+ # @auto_retry
106
+ # def add_emotion_to_text(self, text: str) -> dict:
107
+ # completion = self.client.chat.completions.create(
108
+ # model=self.model_type,
109
+ # messages=[
110
+ # {"role": "system", "content": self.text_modification_prompt},
111
+ # {"role": "user", "content": text},
112
+ # ],
113
+ # response_format={"type": "json_object"},
114
+ # )
115
+ # chatgpt_output = completion.choices[0].message.content
116
+ # try:
117
+ # output_dict = json.loads(chatgpt_output)
118
+ # logger.info(
119
+ # "Successfully modified text with emotional cues: %s", output_dict
120
+ # )
121
+ # return output_dict
122
+ # except json.JSONDecodeError as e:
123
+ # logger.error("Error in parsing the modified text: %s", e)
124
+ # raise f"error, output_text: {chatgpt_output}"
125
 
126
 
127
  class EffectGeneratorAsync(AbstractEffectGenerator):
 
173
 
174
  @auto_retry
175
  async def generate_parameters_for_sound_effect(
176
+ self, text: str, generated_audio_file: str | None = None
177
+ ) -> TextPreparationForTTSTaskOutput:
178
  llm_output = await self.generate_text_for_sound_effect(text)
179
  if generated_audio_file is not None:
180
  llm_output["duration_seconds"] = get_audio_duration(generated_audio_file)
 
182
  "Added duration_seconds to output based on generated audio file: %s",
183
  generated_audio_file,
184
  )
185
+ return TextPreparationForTTSTaskOutput(task="add_effects", output=llm_output)
186
 
187
  @auto_retry
188
+ async def add_emotion_to_text(self, text: str) -> TextPreparationForTTSTaskOutput:
189
  completion = await self.client.chat.completions.create(
190
  model=self.model_type,
191
  messages=[
 
200
  logger.info(
201
  "Successfully modified text with emotional cues: %s", output_dict
202
  )
203
+ return TextPreparationForTTSTaskOutput(
204
+ task="add_emotion", output=output_dict
205
+ )
206
  except json.JSONDecodeError as e:
207
  logger.error("Error in parsing the modified text: %s", e)
208
  raise f"error, output_text: {chatgpt_output}"
src/select_voice_chain.py CHANGED
@@ -13,6 +13,7 @@ from pydantic import BaseModel
13
  from src.config import logger
14
  from src.prompts import CharacterVoicePropertiesPrompt
15
  from src.utils import GPTModels, get_chat_llm
 
16
 
17
 
18
  class Property(StrEnum):
@@ -55,13 +56,21 @@ class VoiceSelector:
55
  Property.age_group: {"young", "middle_aged", "old"},
56
  }
57
 
58
- def __init__(self, csv_table_fp: str):
59
- self.df = self.read_data_table(csv_table_fp=csv_table_fp)
60
 
61
  def read_data_table(self, csv_table_fp: str):
62
  logger.info(f'reading voice data from: "{csv_table_fp}"')
63
  df = pd.read_csv(csv_table_fp)
 
64
  df["age"] = df["age"].str.replace(" ", "_").str.replace("-", "_")
 
 
 
 
 
 
 
65
  return df
66
 
67
  def get_available_properties_str(self, prop: Property):
 
13
  from src.config import logger
14
  from src.prompts import CharacterVoicePropertiesPrompt
15
  from src.utils import GPTModels, get_chat_llm
16
+ from src.config import VOICES_CSV_FP
17
 
18
 
19
  class Property(StrEnum):
 
56
  Property.age_group: {"young", "middle_aged", "old"},
57
  }
58
 
59
+ def __init__(self):
60
+ self.df = self.read_data_table(csv_table_fp=VOICES_CSV_FP)
61
 
62
  def read_data_table(self, csv_table_fp: str):
63
  logger.info(f'reading voice data from: "{csv_table_fp}"')
64
  df = pd.read_csv(csv_table_fp)
65
+ logger.info(f"{df.shape=}")
66
  df["age"] = df["age"].str.replace(" ", "_").str.replace("-", "_")
67
+
68
+ if "manual_quality_review" in df.columns:
69
+ logger.info('filtering df by "manual_quality_review" column')
70
+ ix_to_drop = df[df["manual_quality_review"].isin(["very bad"])].index
71
+ df.drop(index=ix_to_drop, inplace=True)
72
+ logger.info(f"df.shape after filtering voices: {df.shape}")
73
+
74
  return df
75
 
76
  def get_available_properties_str(self, prop: Property):
src/tts.py CHANGED
@@ -7,6 +7,7 @@ from elevenlabs import VoiceSettings
7
  load_dotenv()
8
 
9
  from src.config import logger, ELEVENLABS_API_KEY
 
10
 
11
  ELEVEN_CLIENT = ElevenLabs(api_key=ELEVENLABS_API_KEY)
12
 
@@ -45,6 +46,14 @@ async def tts_astream(
45
  yield chunk
46
 
47
 
 
 
 
 
 
 
 
 
48
  async def sound_generation_astream(
49
  sound_generation_data: dict,
50
  ) -> t.AsyncIterator[bytes]:
 
7
  load_dotenv()
8
 
9
  from src.config import logger, ELEVENLABS_API_KEY
10
+ from src.utils import auto_retry
11
 
12
  ELEVEN_CLIENT = ElevenLabs(api_key=ELEVENLABS_API_KEY)
13
 
 
46
  yield chunk
47
 
48
 
49
+ @auto_retry
50
+ async def tts_astream_consumed(
51
+ voice_id: str, text: str, params: dict | None = None
52
+ ) -> list[bytes]:
53
+ aiterator = tts_astream(voice_id=voice_id, text=text, params=params)
54
+ return [x async for x in aiterator]
55
+
56
+
57
  async def sound_generation_astream(
58
  sound_generation_data: dict,
59
  ) -> t.AsyncIterator[bytes]:
src/utils.py CHANGED
@@ -28,7 +28,7 @@ async def consume_aiter(aiterator):
28
 
29
  def auto_retry(f):
30
  decorator = retry(
31
- wait=wait_random_exponential(min=1, max=5),
32
- stop=stop_after_attempt(6),
33
  )
34
  return decorator(f)
 
28
 
29
  def auto_retry(f):
30
  decorator = retry(
31
+ wait=wait_random_exponential(min=2, max=6),
32
+ stop=stop_after_attempt(10),
33
  )
34
  return decorator(f)