Spaces:
Running
Running
navalnica
commited on
Commit
Β·
95849c2
1
Parent(s):
8797a8a
stability: use semaphore for tts text LLM preprocessing; retry tts_astream;
Browse files- .gitignore +1 -0
- README.md +22 -8
- data/11labs_available_tts_voices.reviewed.csv +35 -0
- data/samples_to_split.py +142 -0
- src/audio_generators.py +53 -27
- src/builder.py +2 -4
- src/config.py +4 -0
- src/emotions/generation.py +102 -93
- src/select_voice_chain.py +11 -2
- src/tts.py +9 -0
- src/utils.py +2 -2
.gitignore
CHANGED
@@ -5,5 +5,6 @@ venv
|
|
5 |
.DS_Store
|
6 |
|
7 |
data/books
|
|
|
8 |
|
9 |
.env
|
|
|
5 |
.DS_Store
|
6 |
|
7 |
data/books
|
8 |
+
data/audiobooks
|
9 |
|
10 |
.env
|
README.md
CHANGED
@@ -11,7 +11,9 @@ python_version: 3.11
|
|
11 |
---
|
12 |
|
13 |
### Action items
|
14 |
-
|
|
|
|
|
15 |
- intonations
|
16 |
- add context
|
17 |
- audio effects
|
@@ -19,14 +21,26 @@ python_version: 3.11
|
|
19 |
- filter, apply only for long phrases
|
20 |
- only for narrator?
|
21 |
- checkbox! make effects great again (no) optional
|
22 |
-
-
|
23 |
-
-
|
24 |
-
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
25 |
- optimizations
|
|
|
26 |
- combine sequential phrases of same character in single phrase
|
27 |
- support large texts. use batching. problem: how to ensure same characters?
|
28 |
-
- can detect characters in first prompt, then split text in each batch into character phrases
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
|
|
|
11 |
---
|
12 |
|
13 |
### Action items
|
14 |
+
|
15 |
+
- voices
|
16 |
+
- filter to use only best voices
|
17 |
- intonations
|
18 |
- add context
|
19 |
- audio effects
|
|
|
21 |
- filter, apply only for long phrases
|
22 |
- only for narrator?
|
23 |
- checkbox! make effects great again (no) optional
|
24 |
+
- stability
|
25 |
+
- add limit on input text size (5000 chars)
|
26 |
+
- improve UI
|
27 |
+
- add error box
|
28 |
+
- add samples
|
29 |
+
- show character parts
|
30 |
+
- remove file upload pane
|
31 |
+
- labels on how long to wait
|
32 |
+
- labels describing components
|
33 |
+
- header and description
|
34 |
+
- prepare slides / story
|
35 |
+
- testing
|
36 |
+
- eval current execution time
|
37 |
+
- test on different text inputs
|
38 |
- optimizations
|
39 |
+
- generate audio effects asynchronously
|
40 |
- combine sequential phrases of same character in single phrase
|
41 |
- support large texts. use batching. problem: how to ensure same characters?
|
42 |
+
- can detect characters in first prompt, then split text in each batch into character phrases
|
43 |
+
- probably split large phrases into smaller ones
|
44 |
+
- identify unknown characters
|
45 |
+
- use LLM to recognize characters for a given text and provide descriptions detailed enough to select appropriate voice
|
46 |
|
data/11labs_available_tts_voices.reviewed.csv
ADDED
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
voice_id,name,preview_url,manual_quality_review,owner_id,permission_on_resource,is_legacy,is_mixed,accent,description,age,gender,category,language,descriptive
|
2 |
+
8opUN7sGOKbyojnjvNdl,Angela,https://storage.googleapis.com/eleven-public-prod/KI49vYhSgygxHVkjNTjMnJEaS6H2/voices/V8OwijRGPjSNuaZo64vg/6923fbb2-a713-4d42-a590-b8999b74f644.mp3,ok,,admin,FALSE,FALSE,american,,middle_aged,female,conversational,,confident
|
3 |
+
yu4eXTP5aod8KAQzTI3T,"Claudia - Credible, Competent & Authentic",https://storage.googleapis.com/eleven-public-prod/database/user/Db6dCWW3pFVb0DPq7Q5s86VmuNQ2/voices/yu4eXTP5aod8KAQzTI3T/8zgX5ljCJ8kmBL0Q8rx7.mp3,ok,,admin,FALSE,FALSE,american,,middle_aged,female,conversational,en,confident
|
4 |
+
wL9XWq1lRS2ZxgYArGOm,Freya - Concise and Wellspoken,https://storage.googleapis.com/eleven-public-prod/ofJ3ST4rvZcJUjpc7t5B7gqcCz93/voices/eWgG9MAbD9umBchcWP1B/cebbd3e7-2460-43d7-8ae3-d20160bc3e44.mp3,medium,,admin,FALSE,FALSE,american,,middle_aged,female,conversational,,formal
|
5 |
+
GUM0hIboeFNpFw8Le7kY,Amber,https://storage.googleapis.com/eleven-public-prod/Eq8tyIYOx5PoyXO0aMbKV4xG1FW2/voices/XX9E8l5jsdMf8kszvsyX/b8a2aa2c-e3dc-42d0-b943-a2eb5d760315.mp3,bad,,admin,FALSE,FALSE,american,,middle_aged,female,conversational,,calm
|
6 |
+
MS8BX0O8omm5Ie6mL8v2,Kay - 65 years old,https://storage.googleapis.com/eleven-public-prod/8UfhnmuNoPSOxBcoJqW3NsegvUG2/voices/r8SplNeU9vfxsNrJFstn/823adabf-a99a-4537-ac95-90e2ee315d2d.mp3,ok,,admin,FALSE,FALSE,british,,old,female,conversational,,calm
|
7 |
+
NNYB2AxILPYTs53mGYij,Jenn,https://storage.googleapis.com/eleven-public-prod/63YWf9FcbyS5PpTayiUUxnQ95Cj2/voices/QxTS99N2QuIZNwTkGB5M/518786bf-ee01-400e-a690-c75193fee06a.mp3,bad,,admin,FALSE,FALSE,american,,old,female,conversational,,serious
|
8 |
+
RigKMMlryPP9jHT50Jzl,Sybil,https://storage.googleapis.com/eleven-public-prod/LPSMwp0QPIPEu76c5UE6a2Tg6kU2/voices/bRp2SymzfuLvibautVoX/eb9ad599-df42-4447-8d85-b19ac1333018.mp3,bad,,admin,FALSE,FALSE,australian,,old,female,conversational,,casual
|
9 |
+
v921MVi0ixhdc1CW2WxF,Aspexia - Grand & Clear,https://storage.googleapis.com/eleven-public-prod/lA5gT7FFPLVt66XJyVjpVR1ljPA2/voices/ZV3F48j5cMfp5ZlnpCs4/eb367cb4-5e11-4cbd-8fbe-c417bf21ffd3.mp3,ok,,admin,FALSE,FALSE,american,,old,female,conversational,,casual
|
10 |
+
yxHHj4hKJKKTyvUpnbds,Heather - Mature British lady,https://storage.googleapis.com/eleven-public-prod/28U448fBuufmkz5lMBQX6JLm8sw2/voices/f3SgjPzocHWjSn3VXeKy/c9a06d6e-50fa-4ce3-9e7f-f6fd8c252898.mp3,ok,,admin,FALSE,FALSE,british,,old,female,conversational,,mature
|
11 |
+
cgSgspJ2msm6clMCkdW9,Jessica,https://storage.googleapis.com/eleven-public-prod/premade/voices/cgSgspJ2msm6clMCkdW9/56a97bf8-b69b-448f-846c-c3a11683d45a.mp3,ok,,,FALSE,FALSE,american,expressive,young,female,conversational,,
|
12 |
+
1btZhL2wthuOhUqvI0bB,Emily - pleasant teen voice,https://storage.googleapis.com/eleven-public-prod/opnveFdsOUSqSDvVg1e9RtN4lWG2/voices/tZ7CAYevCBDjnoUrMFli/430cc09c-257d-41cd-acdd-5177e05a193a.mp3,ok,,admin,FALSE,FALSE,british,,young,female,conversational,,pleasant
|
13 |
+
K5DRk4s8l1HFKsggS25u,Adrianna,https://storage.googleapis.com/eleven-public-prod/63YWf9FcbyS5PpTayiUUxnQ95Cj2/voices/ZWxdaecYtNJxT2ist5K7/201af84a-5d00-4c9f-999b-625bb8dab137.mp3,ok,,admin,FALSE,FALSE,australian,,young,female,conversational,,pleasant
|
14 |
+
PoHUWWWMHFrA8z7Q88pu,Miranda,https://storage.googleapis.com/eleven-public-prod/ZXqeDajThsaYR96DSywtY94M8Dy1/voices/PoHUWWWMHFrA8z7Q88pu/403a094a-c4e4-431f-b96a-7c63ae76508d.mp3,ok,,admin,FALSE,FALSE,american,,young,female,conversational,en,cute
|
15 |
+
Qo4wB0XcrmHFXFyP38mc,Sasha - Soothing and Chill,https://storage.googleapis.com/eleven-public-prod/u9529nqLipaEPA53CKpxKF7nqLH2/voices/mnp8rIZc6Kiem1xaJDaM/95b33e4a-e41b-4fda-9eb8-cab3039bce60.mp3,ok,,admin,FALSE,FALSE,american,,young,female,conversational,,chill
|
16 |
+
U0gWepDCHKmmexbBxeSX,Alicia - Black Female,https://storage.googleapis.com/eleven-public-prod/laura1cGHOTJh1Ah99tjAoTLy7z1/voices/NBcGlQxeT5lFe7hgzwTR/f69f1f70-26a0-403c-915d-1992b6d8cfd0.mp3,ok,,admin,FALSE,FALSE,american,,young,female,conversational,,crisp
|
17 |
+
rfkTsdZrVWEVhDycUYn9,Shelby,https://storage.googleapis.com/eleven-public-prod/database/user/amWos2WP7hRs0yPBtA0OwjuzHAH2/voices/rfkTsdZrVWEVhDycUYn9/iiFLm9bEQ83kiP0bExhG.mp3,ok,,admin,FALSE,FALSE,british,,young,female,conversational,en,pleasant
|
18 |
+
teAOBFSeynXfbyNgq6Ec,Ally - Curious and Chill,https://storage.googleapis.com/eleven-public-prod/ofJ3ST4rvZcJUjpc7t5B7gqcCz93/voices/mW6Z7SZeRqOvuzCTgxyk/2132b3d1-fdcf-4036-85c9-bdd003941130.mp3,bad,,admin,FALSE,FALSE,american,,young,female,conversational,,chill
|
19 |
+
IKne3meq5aSn9XLyUdCD,Charlie,https://storage.googleapis.com/eleven-public-prod/premade/voices/IKne3meq5aSn9XLyUdCD/102de6f2-22ed-43e0-a1f1-111fa75c5481.mp3,ok,,,FALSE,FALSE,australian,natural,middle_aged,male,conversational,,
|
20 |
+
cjVigY5qzO86Huf0OWal,Eric,https://storage.googleapis.com/eleven-public-prod/premade/voices/cjVigY5qzO86Huf0OWal/d098fda0-6456-4030-b3d8-63aa048c9070.mp3,medium,,,FALSE,FALSE,american,friendly,middle_aged,male,conversational,,
|
21 |
+
BFUk567oZITYKwOqegEq,Riley - loud and intense,https://storage.googleapis.com/eleven-public-prod/UwDtqCF44YaL77wxb8DVQlHT5Gp1/voices/60G0VdAP3WBQQbE6tSkT/ecc00def-2543-4b50-b93d-5d4b6c7dca33.mp3,very bad,,admin,FALSE,FALSE,american,,middle_aged,male,conversational,,intense
|
22 |
+
EkuRA6XL9UbflTWEtNbQ,Middle age Southern Male,https://storage.googleapis.com/eleven-public-prod/0gh9bWjaVmNOvQJVcRddxeYIS2z1/voices/t5Oo3tZSuEZt6BD2VGV4/5c0177c5-46bd-414c-abfd-6cd6d5677f08.mp3,medium,,admin,FALSE,FALSE,american,,middle_aged,male,conversational,,casual
|
23 |
+
MP7UPhn7eVWqCGJGIh6Q,Aaron Patrick - Fun-Upbeat,https://storage.googleapis.com/eleven-public-prod/database/user/ktIm5hvnGlc2TVlwOiZmbmw9kHy2/voices/MP7UPhn7eVWqCGJGIh6Q/NFiMZncqQJ0IFTzFGbwQ.mp3,ok,,admin,FALSE,FALSE,american,,middle_aged,male,conversational,en,upbeat
|
24 |
+
RPEIZnKMqlQiZyZd1Dae,Christopher - friendly guy next door,https://storage.googleapis.com/eleven-public-prod/database/user/HURZYaLa4shZEqiT75qd5tyEsSr1/voices/RPEIZnKMqlQiZyZd1Dae/FwLtZ4mCBHV0eLjbUM8Y.mp3,ok,,admin,FALSE,FALSE,american,,middle_aged,male,conversational,en,casual
|
25 |
+
Tx7VLgfksXHVnoY6jDGU,"Conversational Joe - A chatty casual voice, British RP male",https://storage.googleapis.com/eleven-public-prod/database/user/wf6Rmje05ZbqeHYfK82ThsPKouC2/voices/Tx7VLgfksXHVnoY6jDGU/ab4X4F9RcNSeTwBS8KS9.mp3,ok,,admin,FALSE,FALSE,british,,middle_aged,male,conversational,en,casual
|
26 |
+
WLKp2jV6nrS8aMkPPDRO,Paul - Australian Professional Presenter,https://storage.googleapis.com/eleven-public-prod/custom/voices/WLKp2jV6nrS8aMkPPDRO/eaTiwieru6XA3d8ocVFK.mp3,ok,,admin,FALSE,FALSE,australian,,middle_aged,male,conversational,en,professional
|
27 |
+
r5Al4oLBZaQ3SY9ncbVG,"Kass - Energetic, Casual, Engaging",https://storage.googleapis.com/eleven-public-prod/R51RaBhjNqbqLhP6cvEePomgTZX2/voices/r5Al4oLBZaQ3SY9ncbVG/18422b0c-f056-467c-997e-6a13854baea6.mp3,ok,,admin,FALSE,FALSE,american,,middle_aged,male,conversational,en,excited
|
28 |
+
NWQswDS4aKfvBwYabvGT,CΔlin OZ,https://storage.googleapis.com/eleven-public-prod/FJwaL91a8SZIMeJz4Nx1ea8F1N63/voices/fi2r0fw0bQUShZBKDyrI/ba51edb4-2776-410b-aee9-597a25ec7492.mp3,bad,,admin,FALSE,FALSE,american,,old,male,conversational,,wise
|
29 |
+
QT02s1mEPA02kfid1nTk,"Donald - American, 70 years old",https://storage.googleapis.com/eleven-public-prod/8UfhnmuNoPSOxBcoJqW3NsegvUG2/voices/MIer4gjk6bcEMGSRRkOs/06d81bc4-5cfa-4f46-b269-6cb655a45ad9.mp3,ok,,admin,FALSE,FALSE,american,,old,male,conversational,,intense
|
30 |
+
eAyDL3LKBhCxNF21PbuQ,Mike - teacher,https://storage.googleapis.com/eleven-public-prod/ZNTWuC7IYQQWqzzeKbpLS0FeplI3/voices/Au1xn1q4XOiitEswfIzo/56b03843-3723-4617-a486-bbedb0f84828.mp3,bad,,admin,FALSE,FALSE,american,,old,male,conversational,,pleasant
|
31 |
+
fQpAewJVtu0DoZkQ58bW,Old man with a soft voice,https://storage.googleapis.com/eleven-public-prod/uRcoDhI6DVf8aT39RswuDWK2C0P2/voices/xDBqezbIeYil8jkpc3c0/d2771c6e-0383-4050-bfa8-84a963baa098.mp3,bad,,admin,FALSE,FALSE,american,,old,male,conversational,,calm
|
32 |
+
z3CSZSXDkv7fvwqfOrZN,Bryan,https://storage.googleapis.com/eleven-public-prod/Y3yb5Fdg8aVDi9n3MLjYB1v0tMF2/voices/0HpbeTZr9DdE1r0eGVn1/4c4cd4a7-c2ea-45d3-b402-55fc875240e0.mp3,medium,,admin,FALSE,FALSE,american,,old,male,conversational,,confident
|
33 |
+
1RVpBInY9YUYMLSUQReV,Cody McAvoy,https://storage.googleapis.com/eleven-public-prod/EzfotYSdpJdL97IDcxmmjJAFhrB3/voices/1RVpBInY9YUYMLSUQReV/f4807088-b6ff-48fa-83ca-a060fe724d10.mp3,ok,,admin,FALSE,FALSE,american,,young,male,conversational,en,pleasant
|
34 |
+
IFtWI8YHaBY8I7EB217u,ADAM,https://storage.googleapis.com/eleven-public-prod/21EHOpCpDMOQhKNMRI94rNioulW2/voices/4KEs1SFpz8x6fuT7MB5B/e0992f5a-2528-4a2a-981c-0470ccb0e935.mp3,bad,,admin,FALSE,FALSE,american,,young,male,conversational,,pleasant
|
35 |
+
xSI29a9HAKdsWv3idXSN,Ali,https://storage.googleapis.com/eleven-public-prod/Je2OUP5SdgSZhG40J7umHz4bhXB3/voices/IlNTfQEoyv1KrV6Jbb0c/a4ba8772-1400-4910-adee-9e0dd65e2b74.mp3,ok,,admin,FALSE,FALSE,american,,young,male,conversational,,intense
|
data/samples_to_split.py
CHANGED
@@ -96,3 +96,145 @@ up on us and first thing you knowββ
|
|
96 |
|
97 |
βDonβt believe everything you hear, Nick,β he advised me.
|
98 |
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
96 |
|
97 |
βDonβt believe everything you hear, Nick,β he advised me.
|
98 |
"""
|
99 |
+
|
100 |
+
WONDERFUL_CHRISTMAS_1 = """\
|
101 |
+
βDid the "Snow" go down? Who knows?β she asked, sobbing and crying, and
|
102 |
+
quite broken down by hearing the fact put into words.
|
103 |
+
|
104 |
+
βIβm sorry we said a word,β said Kate, crying with Mrs. Dobson; and,
|
105 |
+
dropping down upon a footstool, she laid her head in Mrs. Dobsonβs lap.
|
106 |
+
|
107 |
+
βYou will please forgive us, wonβt you?β spoke up the boy in the bed.
|
108 |
+
|
109 |
+
βForgive you! Yes, indeed. Iβm glad you thought enough about my
|
110 |
+
poorβabout Captain Dobson to talk of him at all, and to-day, too! Why, I
|
111 |
+
always go down the harbor to-day; it is just thirty-seven years ago
|
112 |
+
to-day since we were married and the ship went on its voyage.β
|
113 |
+
|
114 |
+
βAnd you will go this afternoon, wonβt you?β questioned Kate.
|
115 |
+
|
116 |
+
βI mustnβt,β she said simply.
|
117 |
+
|
118 |
+
βWhy not?β
|
119 |
+
|
120 |
+
βWhy, Iβve got a little boy of my own to look after to-day, donβt you
|
121 |
+
see?β she said, suddenly smiling, with an uplifted face.
|
122 |
+
"""
|
123 |
+
|
124 |
+
# lots of narrator speech
|
125 |
+
WONDERFUL_CHRISTMAS_2 = """\
|
126 |
+
Harry Cornwall ran out from the tent to watch the flight of Frank
|
127 |
+
Hallock, who ran, frightened by the tiger, and as the boy disappeared
|
128 |
+
from sight over the hilltop, Harry determined to try and find time
|
129 |
+
enough that very evening to run up to the big house and tell all he knew
|
130 |
+
about the necklace that somebody had tucked into the pocket of the
|
131 |
+
waistcoat that fell to Jack Flibbit after the great fire. Harry could
|
132 |
+
not persuade himself that two little girls in the land had put two
|
133 |
+
necklaces, with the same mark, into pockets to go βOut West.β Harryβs
|
134 |
+
name was called in a loud tone, and he ran to obey the call at about the
|
135 |
+
same moment that Frank Hallock reached the cornfield and picked up his
|
136 |
+
hoe.
|
137 |
+
|
138 |
+
Frank counted the hills and the rows, and scarcely looked up until the
|
139 |
+
sound of the one oβclock train, on the New Haven Railroad, passing
|
140 |
+
through the town, told to him how short the hour had been.
|
141 |
+
|
142 |
+
Then Neptune came home. Frank knew that Kate was on the carriage-road
|
143 |
+
that ran past the field, but he would not look up, not even when he
|
144 |
+
heard her cheery call to him; so Neptune and Kate went on their way, and
|
145 |
+
presently the welcome sound of the dinner-horn was heard.
|
146 |
+
|
147 |
+
Frank did not throw down his hoe, but fell to work harder than ever. In
|
148 |
+
five minutesβ time the horn was sounded again, and on looking up, Frank
|
149 |
+
beheld Kate standing on the verandaβshe was waving her hat to attract
|
150 |
+
his attention. He was inclined to make a martyr of himself just then, so
|
151 |
+
he waved his straw hat in return, and immediately resolved to βhoe away
|
152 |
+
like a major.β
|
153 |
+
|
154 |
+
In fifteen minutes more Kate was making her way for the second time that
|
155 |
+
day through Frankβs cornfield. As she drew near, she called out, βFrank!
|
156 |
+
Frank! why in the world donβt you come to dinner? There is a gentleman
|
157 |
+
at table who came to see papa on business, and I ran away after the
|
158 |
+
soupβI couldnβt eat my dinner one bit, without you.β
|
159 |
+
|
160 |
+
βYouβll _have_ to, I reckon,β returned Frank; βa poor fellow, like me,
|
161 |
+
who has to hoe corn all day, canβt stop to eat.β
|
162 |
+
|
163 |
+
βO, Frank Hallock! _for shame!_β cried Kate, putting down her indignant
|
164 |
+
foot without being able to make noise enough about it to disturb an
|
165 |
+
earthworm.
|
166 |
+
|
167 |
+
βItβs _true_,β responded Frank, pitching into the next hill with all his
|
168 |
+
might.
|
169 |
+
|
170 |
+
βIt is _not_ true,β cried Kate; βand if just running off to _look_ at
|
171 |
+
the circus pass by makes you say such things, I am glad you canβt go to
|
172 |
+
see it.β\
|
173 |
+
"""
|
174 |
+
|
175 |
+
|
176 |
+
ARCH_WIKI_1 = """\
|
177 |
+
This document is an annotated index of popular articles and important information for improving and adding functionalities to the installed Arch system. Readers are assumed to have read and followed the Installation guide to obtain a basic Arch Linux installation. Having read and understood the concepts explained in #System administration and #Package management is required for following the other sections of this page and the other articles in the wiki.
|
178 |
+
System administration
|
179 |
+
|
180 |
+
This section deals with administrative tasks and system management. See Core utilities and Category:System administration for more.
|
181 |
+
Users and groups
|
182 |
+
|
183 |
+
A new installation leaves you with only the superuser account, better known as "root". Logging in as root for prolonged periods of time, possibly even exposing it via SSH on a server, is insecure. Instead, you should create and use unprivileged user account(s) for most tasks, only using the root account for system administration. See Users and groups#User management for details.
|
184 |
+
|
185 |
+
Users and groups are a mechanism for access control; administrators may fine-tune group membership and ownership to grant or deny users and services access to system resources. Read the Users and groups article for details and potential security risks.
|
186 |
+
Security
|
187 |
+
|
188 |
+
Read Security for recommendations and best practices on hardening the system.
|
189 |
+
|
190 |
+
For a list of applications to allow running commands or starting an interactive shell as another user (e.g. root), see List of applications/Security#Privilege elevation.
|
191 |
+
Service management
|
192 |
+
|
193 |
+
Arch Linux uses systemd as the init process, which is a system and service manager for Linux. For maintaining your Arch Linux installation, it is a good idea to learn the basics about it.
|
194 |
+
|
195 |
+
Interaction with systemd is done through the systemctl command. See systemd#Basic systemctl usage for more information.
|
196 |
+
|
197 |
+
A logging system is also provided, with the command journalctl. See journal for more information.
|
198 |
+
System maintenance
|
199 |
+
|
200 |
+
Arch is a rolling release system and has rapid package turnover, so users have to take some time to do system maintenance.
|
201 |
+
Package management
|
202 |
+
|
203 |
+
This section contains helpful information related to package management. See FAQ#Package management and Category:Package management for more.
|
204 |
+
Note: It is imperative to keep up to date with changes in Arch Linux that require manual intervention before upgrading your system. Subscribe to the arch-announce mailing list or the recent news RSS feed. Alternatively, check the front page Arch news every time before you update.
|
205 |
+
pacman
|
206 |
+
|
207 |
+
pacman is the Arch Linux package manager: it is highly encouraged to become familiar with it before reading any other articles.
|
208 |
+
|
209 |
+
To enable downloading packages in parallel, see pacman#Enabling parallel downloads.
|
210 |
+
|
211 |
+
For long term handling of cached packages, see pacman#Cleaning the package cache.
|
212 |
+
|
213 |
+
See pacman/Tips and tricks for suggestions on how to improve your interaction with pacman and package management in general.
|
214 |
+
Repositories
|
215 |
+
|
216 |
+
See the Official repositories article for details about the purpose of each officially maintained repository.
|
217 |
+
|
218 |
+
If you plan on using 32-bit applications, you will want to enable the multilib repository.
|
219 |
+
|
220 |
+
The Unofficial user repositories article lists several other unsupported repositories.
|
221 |
+
|
222 |
+
You may consider installing the pkgstats service.
|
223 |
+
Mirrors
|
224 |
+
|
225 |
+
Visit the Mirrors article for steps on taking full advantage of using the fastest and most up to date mirrors of the official repositories. As explained in the article, a particularly good advice is to routinely check the Mirror Status page for a list of mirrors that have been recently synced. This can be automated with Reflector.
|
226 |
+
Arch Build System
|
227 |
+
|
228 |
+
Ports is a system initially used by BSD distributions consisting of build scripts that reside in a directory tree on the local system. Simply put, each port contains a script within a directory intuitively named after the installable third-party application.
|
229 |
+
|
230 |
+
The Arch build system offers the same functionality by providing build scripts called PKGBUILDs, which are populated with information for a given piece of software: integrity hashes, project URL, version, license and build instructions. These PKGBUILDs are parsed by makepkg, the actual program that generates packages that are cleanly manageable by pacman.
|
231 |
+
|
232 |
+
Every package in the repositories along with those present in the AUR are subject to recompilation with makepkg.
|
233 |
+
Arch User Repository
|
234 |
+
|
235 |
+
While the Arch Build System allows the ability of building software available in the official repositories, the Arch User Repository (AUR) is the equivalent for user submitted packages. It is an unsupported repository of build scripts accessible through the web interface or through the Aurweb RPC interface.
|
236 |
+
Booting
|
237 |
+
|
238 |
+
This section contains information pertaining to the boot process. An overview of the Arch boot process can be found at Arch boot process. See Category:Boot process for more.
|
239 |
+
Hardware auto-recognition
|
240 |
+
"""
|
src/audio_generators.py
CHANGED
@@ -9,11 +9,14 @@ from langchain_community.callbacks import get_openai_callback
|
|
9 |
from pydub import AudioSegment
|
10 |
|
11 |
from src.lc_callbacks import LCMessageLoggerAsync
|
12 |
-
from src.tts import
|
13 |
from src.utils import auto_retry, consume_aiter
|
14 |
-
from src.emotions.generation import
|
|
|
|
|
|
|
15 |
from src.emotions.utils import add_overlay_for_audio
|
16 |
-
from src.config import ELEVENLABS_MAX_PARALLEL, logger
|
17 |
from src.text_split_chain import SplitTextOutput
|
18 |
|
19 |
|
@@ -28,8 +31,8 @@ class AudioGeneratorSimple:
|
|
28 |
|
29 |
async def tts_astream_with_semaphore(voice_id: str, text: str):
|
30 |
async with semaphore:
|
31 |
-
|
32 |
-
bytes_ = await consume_aiter(iter_)
|
33 |
return bytes_
|
34 |
|
35 |
tasks = []
|
@@ -77,12 +80,10 @@ class AudioGeneratorWithEffects:
|
|
77 |
)
|
78 |
logger.info(f"{generate_effects = }, {lines_for_sound_effect = }")
|
79 |
|
80 |
-
|
81 |
-
modified_texts, sound_emotion_results = await self._process_and_modify_text(
|
82 |
text_split, lines_for_sound_effect
|
83 |
)
|
84 |
|
85 |
-
# Step 2: Generate TTS audio for modified text
|
86 |
tts_results, self.temp_files = await self._generate_tts_audio(
|
87 |
text_split, modified_texts, character_to_voice
|
88 |
)
|
@@ -93,8 +94,12 @@ class AudioGeneratorWithEffects:
|
|
93 |
)
|
94 |
|
95 |
# Step 4: Merge audio files
|
96 |
-
normalized_audio_chunks = self._normalize_audio_chunks(
|
97 |
-
|
|
|
|
|
|
|
|
|
98 |
|
99 |
# Clean up temporary files
|
100 |
self._cleanup_temp_files(self.temp_files)
|
@@ -105,34 +110,51 @@ class AudioGeneratorWithEffects:
|
|
105 |
"""Select % of the lines randomly for sound effect generation."""
|
106 |
return random.sample(range(num_lines), k=int(fraction * num_lines))
|
107 |
|
108 |
-
async def
|
109 |
self, text_split: SplitTextOutput, lines_for_sound_effect: list[int]
|
110 |
) -> tuple[list[dict], list[dict]]:
|
111 |
-
|
112 |
-
|
113 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
114 |
|
115 |
for idx, character_phrase in enumerate(text_split.phrases):
|
116 |
character_text = character_phrase.text.strip().lower()
|
117 |
|
118 |
-
|
119 |
-
|
120 |
-
|
|
|
|
|
121 |
)
|
122 |
|
123 |
# If this line needs sound effects, generate parameters
|
124 |
if idx in lines_for_sound_effect:
|
125 |
-
|
126 |
-
|
127 |
-
|
|
|
128 |
)
|
129 |
)
|
130 |
|
131 |
-
|
132 |
-
|
133 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
134 |
|
135 |
-
return
|
136 |
|
137 |
async def _generate_tts_audio(
|
138 |
self,
|
@@ -146,8 +168,10 @@ class AudioGeneratorWithEffects:
|
|
146 |
|
147 |
async def tts_astream_with_semaphore(voice_id: str, text: str, params: dict):
|
148 |
async with self.semaphore:
|
149 |
-
|
150 |
-
|
|
|
|
|
151 |
return bytes_
|
152 |
|
153 |
for idx, (modified_text, character_phrase) in enumerate(
|
@@ -240,7 +264,9 @@ class AudioGeneratorWithEffects:
|
|
240 |
|
241 |
return normalized_files
|
242 |
|
243 |
-
def _merge_audio_files(
|
|
|
|
|
244 |
"""Helper function to merge multiple audio files into one."""
|
245 |
combined = AudioSegment.from_file(audio_filenames[0])
|
246 |
for filename in audio_filenames[1:]:
|
|
|
9 |
from pydub import AudioSegment
|
10 |
|
11 |
from src.lc_callbacks import LCMessageLoggerAsync
|
12 |
+
from src.tts import tts_astream_consumed, sound_generation_astream
|
13 |
from src.utils import auto_retry, consume_aiter
|
14 |
+
from src.emotions.generation import (
|
15 |
+
EffectGeneratorAsync,
|
16 |
+
TextPreparationForTTSTaskOutput,
|
17 |
+
)
|
18 |
from src.emotions.utils import add_overlay_for_audio
|
19 |
+
from src.config import ELEVENLABS_MAX_PARALLEL, logger, OPENAI_MAX_PARALLEL
|
20 |
from src.text_split_chain import SplitTextOutput
|
21 |
|
22 |
|
|
|
31 |
|
32 |
async def tts_astream_with_semaphore(voice_id: str, text: str):
|
33 |
async with semaphore:
|
34 |
+
bytes_ = await tts_astream_consumed(voice_id=voice_id, text=text)
|
35 |
+
# bytes_ = await consume_aiter(iter_)
|
36 |
return bytes_
|
37 |
|
38 |
tasks = []
|
|
|
80 |
)
|
81 |
logger.info(f"{generate_effects = }, {lines_for_sound_effect = }")
|
82 |
|
83 |
+
modified_texts, sound_emotion_results = await self._prepare_text_for_tts(
|
|
|
84 |
text_split, lines_for_sound_effect
|
85 |
)
|
86 |
|
|
|
87 |
tts_results, self.temp_files = await self._generate_tts_audio(
|
88 |
text_split, modified_texts, character_to_voice
|
89 |
)
|
|
|
94 |
)
|
95 |
|
96 |
# Step 4: Merge audio files
|
97 |
+
normalized_audio_chunks = self._normalize_audio_chunks(
|
98 |
+
audio_chunks, self.temp_files
|
99 |
+
)
|
100 |
+
final_output = self._merge_audio_files(
|
101 |
+
normalized_audio_chunks, save_path=out_path
|
102 |
+
)
|
103 |
|
104 |
# Clean up temporary files
|
105 |
self._cleanup_temp_files(self.temp_files)
|
|
|
110 |
"""Select % of the lines randomly for sound effect generation."""
|
111 |
return random.sample(range(num_lines), k=int(fraction * num_lines))
|
112 |
|
113 |
+
async def _prepare_text_for_tts(
|
114 |
self, text_split: SplitTextOutput, lines_for_sound_effect: list[int]
|
115 |
) -> tuple[list[dict], list[dict]]:
|
116 |
+
semaphore = asyncio.Semaphore(OPENAI_MAX_PARALLEL)
|
117 |
+
|
118 |
+
async def run_task_with_semaphore(func, **params):
|
119 |
+
async with semaphore:
|
120 |
+
outputs = await func(**params)
|
121 |
+
return outputs
|
122 |
+
|
123 |
+
task_emotion_code = "add_emotion"
|
124 |
+
task_effects_code = "add_effects"
|
125 |
+
|
126 |
+
tasks = []
|
127 |
|
128 |
for idx, character_phrase in enumerate(text_split.phrases):
|
129 |
character_text = character_phrase.text.strip().lower()
|
130 |
|
131 |
+
tasks.append(
|
132 |
+
run_task_with_semaphore(
|
133 |
+
func=self.effect_generator.add_emotion_to_text,
|
134 |
+
text=character_text,
|
135 |
+
)
|
136 |
)
|
137 |
|
138 |
# If this line needs sound effects, generate parameters
|
139 |
if idx in lines_for_sound_effect:
|
140 |
+
tasks.append(
|
141 |
+
run_task_with_semaphore(
|
142 |
+
func=self.effect_generator.generate_parameters_for_sound_effect,
|
143 |
+
text=character_text,
|
144 |
)
|
145 |
)
|
146 |
|
147 |
+
tasks_results: list[TextPreparationForTTSTaskOutput] = []
|
148 |
+
tasks_results = await asyncio.gather(*tasks)
|
149 |
+
|
150 |
+
emotion_tasks_results = [
|
151 |
+
x.output for x in tasks_results if x.task == task_emotion_code
|
152 |
+
]
|
153 |
+
effects_tasks_results = [
|
154 |
+
x.output for x in tasks_results if x.task == task_effects_code
|
155 |
+
]
|
156 |
|
157 |
+
return emotion_tasks_results, effects_tasks_results
|
158 |
|
159 |
async def _generate_tts_audio(
|
160 |
self,
|
|
|
168 |
|
169 |
async def tts_astream_with_semaphore(voice_id: str, text: str, params: dict):
|
170 |
async with self.semaphore:
|
171 |
+
bytes_ = await tts_astream_consumed(
|
172 |
+
voice_id=voice_id, text=text, params=params
|
173 |
+
)
|
174 |
+
# bytes_ = await consume_aiter(iter_)
|
175 |
return bytes_
|
176 |
|
177 |
for idx, (modified_text, character_phrase) in enumerate(
|
|
|
264 |
|
265 |
return normalized_files
|
266 |
|
267 |
+
def _merge_audio_files(
|
268 |
+
self, audio_filenames: list[str], save_path: Path | None = None
|
269 |
+
) -> Path:
|
270 |
"""Helper function to merge multiple audio files into one."""
|
271 |
combined = AudioSegment.from_file(audio_filenames[0])
|
272 |
for filename in audio_filenames[1:]:
|
src/builder.py
CHANGED
@@ -9,10 +9,8 @@ from src.utils import GPTModels
|
|
9 |
|
10 |
class AudiobookBuilder:
|
11 |
|
12 |
-
def __init__(self)
|
13 |
-
self.voice_selector = VoiceSelector(
|
14 |
-
csv_table_fp="data/11labs_available_tts_voices.csv"
|
15 |
-
)
|
16 |
self.audio_generator = AudioGeneratorWithEffects()
|
17 |
|
18 |
async def split_text(self, text: str) -> SplitTextOutput:
|
|
|
9 |
|
10 |
class AudiobookBuilder:
|
11 |
|
12 |
+
def __init__(self):
|
13 |
+
self.voice_selector = VoiceSelector()
|
|
|
|
|
14 |
self.audio_generator = AudioGeneratorWithEffects()
|
15 |
|
16 |
async def split_text(self, text: str) -> SplitTextOutput:
|
src/config.py
CHANGED
@@ -12,4 +12,8 @@ ELEVENLABS_API_KEY = os.environ["ELEVEN_LABS_API_KEY"]
|
|
12 |
|
13 |
FILE_SIZE_MAX = 0.5 # in mb
|
14 |
|
|
|
15 |
ELEVENLABS_MAX_PARALLEL = 15 # current limitation of available subscription
|
|
|
|
|
|
|
|
12 |
|
13 |
FILE_SIZE_MAX = 0.5 # in mb
|
14 |
|
15 |
+
OPENAI_MAX_PARALLEL = 8 # empirically set
|
16 |
ELEVENLABS_MAX_PARALLEL = 15 # current limitation of available subscription
|
17 |
+
|
18 |
+
# VOICES_CSV_FP = "data/11labs_available_tts_voices.csv"
|
19 |
+
VOICES_CSV_FP = "data/11labs_available_tts_voices.reviewed.csv"
|
src/emotions/generation.py
CHANGED
@@ -1,7 +1,9 @@
|
|
1 |
import json
|
|
|
2 |
from abc import ABC, abstractmethod
|
3 |
|
4 |
import openai
|
|
|
5 |
from requests import HTTPError
|
6 |
|
7 |
from src.config import OPENAI_API_KEY, logger
|
@@ -16,105 +18,110 @@ from .prompts import (
|
|
16 |
from .utils import get_audio_duration
|
17 |
|
18 |
|
|
|
|
|
|
|
|
|
|
|
19 |
class AbstractEffectGenerator(ABC):
|
20 |
@abstractmethod
|
21 |
-
def generate_text_for_sound_effect(self, text) -> dict:
|
22 |
pass
|
23 |
|
24 |
@abstractmethod
|
25 |
-
def generate_parameters_for_sound_effect(
|
26 |
-
self, text: str, generated_audio_file: str
|
27 |
-
) ->
|
28 |
pass
|
29 |
|
30 |
@abstractmethod
|
31 |
-
def add_emotion_to_text(self, text: str) ->
|
32 |
pass
|
33 |
|
34 |
|
35 |
-
class EffectGenerator(AbstractEffectGenerator):
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
-
|
100 |
-
|
101 |
-
|
102 |
-
|
103 |
-
|
104 |
-
|
105 |
-
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
-
|
111 |
-
|
112 |
-
|
113 |
-
|
114 |
-
|
115 |
-
|
116 |
-
|
117 |
-
|
118 |
|
119 |
|
120 |
class EffectGeneratorAsync(AbstractEffectGenerator):
|
@@ -166,8 +173,8 @@ class EffectGeneratorAsync(AbstractEffectGenerator):
|
|
166 |
|
167 |
@auto_retry
|
168 |
async def generate_parameters_for_sound_effect(
|
169 |
-
self, text: str, generated_audio_file: str = None
|
170 |
-
) ->
|
171 |
llm_output = await self.generate_text_for_sound_effect(text)
|
172 |
if generated_audio_file is not None:
|
173 |
llm_output["duration_seconds"] = get_audio_duration(generated_audio_file)
|
@@ -175,10 +182,10 @@ class EffectGeneratorAsync(AbstractEffectGenerator):
|
|
175 |
"Added duration_seconds to output based on generated audio file: %s",
|
176 |
generated_audio_file,
|
177 |
)
|
178 |
-
return llm_output
|
179 |
|
180 |
@auto_retry
|
181 |
-
async def add_emotion_to_text(self, text: str) ->
|
182 |
completion = await self.client.chat.completions.create(
|
183 |
model=self.model_type,
|
184 |
messages=[
|
@@ -193,7 +200,9 @@ class EffectGeneratorAsync(AbstractEffectGenerator):
|
|
193 |
logger.info(
|
194 |
"Successfully modified text with emotional cues: %s", output_dict
|
195 |
)
|
196 |
-
return
|
|
|
|
|
197 |
except json.JSONDecodeError as e:
|
198 |
logger.error("Error in parsing the modified text: %s", e)
|
199 |
raise f"error, output_text: {chatgpt_output}"
|
|
|
1 |
import json
|
2 |
+
import typing as t
|
3 |
from abc import ABC, abstractmethod
|
4 |
|
5 |
import openai
|
6 |
+
from pydantic import BaseModel
|
7 |
from requests import HTTPError
|
8 |
|
9 |
from src.config import OPENAI_API_KEY, logger
|
|
|
18 |
from .utils import get_audio_duration
|
19 |
|
20 |
|
21 |
+
class TextPreparationForTTSTaskOutput(BaseModel):
|
22 |
+
task: str
|
23 |
+
output: t.Any
|
24 |
+
|
25 |
+
|
26 |
class AbstractEffectGenerator(ABC):
|
27 |
@abstractmethod
|
28 |
+
async def generate_text_for_sound_effect(self, text) -> dict:
|
29 |
pass
|
30 |
|
31 |
@abstractmethod
|
32 |
+
async def generate_parameters_for_sound_effect(
|
33 |
+
self, text: str, generated_audio_file: str | None
|
34 |
+
) -> TextPreparationForTTSTaskOutput:
|
35 |
pass
|
36 |
|
37 |
@abstractmethod
|
38 |
+
async def add_emotion_to_text(self, text: str) -> TextPreparationForTTSTaskOutput:
|
39 |
pass
|
40 |
|
41 |
|
42 |
+
# class EffectGenerator(AbstractEffectGenerator):
|
43 |
+
# def __init__(self, predict_duration: bool = True, model_type: str = "gpt-4o"):
|
44 |
+
# self.client = openai.OpenAI(api_key=OPENAI_API_KEY)
|
45 |
+
# self.sound_effect_prompt = (
|
46 |
+
# SOUND_EFFECT_GENERATION
|
47 |
+
# if predict_duration
|
48 |
+
# else SOUND_EFFECT_GENERATION_WITHOUT_DURATION_PREDICTION
|
49 |
+
# )
|
50 |
+
# self.text_modification_prompt = TEXT_MODIFICATION_WITH_SSML
|
51 |
+
# self.model_type = model_type
|
52 |
+
# logger.info(
|
53 |
+
# f"EffectGenerator initialized with model_type: {model_type}, predict_duration: {predict_duration}"
|
54 |
+
# )
|
55 |
+
|
56 |
+
# @auto_retry
|
57 |
+
# def generate_text_for_sound_effect(self, text: str) -> dict:
|
58 |
+
# """Generate sound effect description and parameters based on input text."""
|
59 |
+
# try:
|
60 |
+
# completion = self.client.chat.completions.create(
|
61 |
+
# model=self.model_type,
|
62 |
+
# messages=[
|
63 |
+
# {"role": "system", "content": self.sound_effect_prompt},
|
64 |
+
# {"role": "user", "content": text},
|
65 |
+
# ],
|
66 |
+
# response_format={"type": "json_object"},
|
67 |
+
# )
|
68 |
+
# # Extracting the output
|
69 |
+
# chatgpt_output = completion.choices[0].message.content
|
70 |
+
|
71 |
+
# # Parse and return JSON response
|
72 |
+
# output_dict = json.loads(chatgpt_output)
|
73 |
+
# logger.info(
|
74 |
+
# "Successfully generated sound effect description: %s", output_dict
|
75 |
+
# )
|
76 |
+
# return output_dict
|
77 |
+
|
78 |
+
# except json.JSONDecodeError as e:
|
79 |
+
# logger.error("Failed to parse the output text as JSON: %s", e)
|
80 |
+
# raise RuntimeError(
|
81 |
+
# f"Error: Failed to parse the output text as JSON.\nOutput: {chatgpt_output}"
|
82 |
+
# )
|
83 |
+
|
84 |
+
# except HTTPError as e:
|
85 |
+
# logger.error("HTTP error occurred: %s", e)
|
86 |
+
# raise RuntimeError(f"HTTP Error: {e}")
|
87 |
+
|
88 |
+
# except Exception as e:
|
89 |
+
# logger.error("Unexpected error occurred: %s", e)
|
90 |
+
# raise RuntimeError(f"Unexpected Error: {e}")
|
91 |
+
|
92 |
+
# @auto_retry
|
93 |
+
# def generate_parameters_for_sound_effect(
|
94 |
+
# self, text: str, generated_audio_file: str = None
|
95 |
+
# ) -> dict:
|
96 |
+
# llm_output = self.generate_text_for_sound_effect(text)
|
97 |
+
# if generated_audio_file is not None:
|
98 |
+
# llm_output["duration_seconds"] = get_audio_duration(generated_audio_file)
|
99 |
+
# logger.info(
|
100 |
+
# "Added duration_seconds to output based on generated audio file: %s",
|
101 |
+
# generated_audio_file,
|
102 |
+
# )
|
103 |
+
# return llm_output
|
104 |
+
|
105 |
+
# @auto_retry
|
106 |
+
# def add_emotion_to_text(self, text: str) -> dict:
|
107 |
+
# completion = self.client.chat.completions.create(
|
108 |
+
# model=self.model_type,
|
109 |
+
# messages=[
|
110 |
+
# {"role": "system", "content": self.text_modification_prompt},
|
111 |
+
# {"role": "user", "content": text},
|
112 |
+
# ],
|
113 |
+
# response_format={"type": "json_object"},
|
114 |
+
# )
|
115 |
+
# chatgpt_output = completion.choices[0].message.content
|
116 |
+
# try:
|
117 |
+
# output_dict = json.loads(chatgpt_output)
|
118 |
+
# logger.info(
|
119 |
+
# "Successfully modified text with emotional cues: %s", output_dict
|
120 |
+
# )
|
121 |
+
# return output_dict
|
122 |
+
# except json.JSONDecodeError as e:
|
123 |
+
# logger.error("Error in parsing the modified text: %s", e)
|
124 |
+
# raise f"error, output_text: {chatgpt_output}"
|
125 |
|
126 |
|
127 |
class EffectGeneratorAsync(AbstractEffectGenerator):
|
|
|
173 |
|
174 |
@auto_retry
|
175 |
async def generate_parameters_for_sound_effect(
|
176 |
+
self, text: str, generated_audio_file: str | None = None
|
177 |
+
) -> TextPreparationForTTSTaskOutput:
|
178 |
llm_output = await self.generate_text_for_sound_effect(text)
|
179 |
if generated_audio_file is not None:
|
180 |
llm_output["duration_seconds"] = get_audio_duration(generated_audio_file)
|
|
|
182 |
"Added duration_seconds to output based on generated audio file: %s",
|
183 |
generated_audio_file,
|
184 |
)
|
185 |
+
return TextPreparationForTTSTaskOutput(task="add_effects", output=llm_output)
|
186 |
|
187 |
@auto_retry
|
188 |
+
async def add_emotion_to_text(self, text: str) -> TextPreparationForTTSTaskOutput:
|
189 |
completion = await self.client.chat.completions.create(
|
190 |
model=self.model_type,
|
191 |
messages=[
|
|
|
200 |
logger.info(
|
201 |
"Successfully modified text with emotional cues: %s", output_dict
|
202 |
)
|
203 |
+
return TextPreparationForTTSTaskOutput(
|
204 |
+
task="add_emotion", output=output_dict
|
205 |
+
)
|
206 |
except json.JSONDecodeError as e:
|
207 |
logger.error("Error in parsing the modified text: %s", e)
|
208 |
raise f"error, output_text: {chatgpt_output}"
|
src/select_voice_chain.py
CHANGED
@@ -13,6 +13,7 @@ from pydantic import BaseModel
|
|
13 |
from src.config import logger
|
14 |
from src.prompts import CharacterVoicePropertiesPrompt
|
15 |
from src.utils import GPTModels, get_chat_llm
|
|
|
16 |
|
17 |
|
18 |
class Property(StrEnum):
|
@@ -55,13 +56,21 @@ class VoiceSelector:
|
|
55 |
Property.age_group: {"young", "middle_aged", "old"},
|
56 |
}
|
57 |
|
58 |
-
def __init__(self
|
59 |
-
self.df = self.read_data_table(csv_table_fp=
|
60 |
|
61 |
def read_data_table(self, csv_table_fp: str):
|
62 |
logger.info(f'reading voice data from: "{csv_table_fp}"')
|
63 |
df = pd.read_csv(csv_table_fp)
|
|
|
64 |
df["age"] = df["age"].str.replace(" ", "_").str.replace("-", "_")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
65 |
return df
|
66 |
|
67 |
def get_available_properties_str(self, prop: Property):
|
|
|
13 |
from src.config import logger
|
14 |
from src.prompts import CharacterVoicePropertiesPrompt
|
15 |
from src.utils import GPTModels, get_chat_llm
|
16 |
+
from src.config import VOICES_CSV_FP
|
17 |
|
18 |
|
19 |
class Property(StrEnum):
|
|
|
56 |
Property.age_group: {"young", "middle_aged", "old"},
|
57 |
}
|
58 |
|
59 |
+
def __init__(self):
|
60 |
+
self.df = self.read_data_table(csv_table_fp=VOICES_CSV_FP)
|
61 |
|
62 |
def read_data_table(self, csv_table_fp: str):
|
63 |
logger.info(f'reading voice data from: "{csv_table_fp}"')
|
64 |
df = pd.read_csv(csv_table_fp)
|
65 |
+
logger.info(f"{df.shape=}")
|
66 |
df["age"] = df["age"].str.replace(" ", "_").str.replace("-", "_")
|
67 |
+
|
68 |
+
if "manual_quality_review" in df.columns:
|
69 |
+
logger.info('filtering df by "manual_quality_review" column')
|
70 |
+
ix_to_drop = df[df["manual_quality_review"].isin(["very bad"])].index
|
71 |
+
df.drop(index=ix_to_drop, inplace=True)
|
72 |
+
logger.info(f"df.shape after filtering voices: {df.shape}")
|
73 |
+
|
74 |
return df
|
75 |
|
76 |
def get_available_properties_str(self, prop: Property):
|
src/tts.py
CHANGED
@@ -7,6 +7,7 @@ from elevenlabs import VoiceSettings
|
|
7 |
load_dotenv()
|
8 |
|
9 |
from src.config import logger, ELEVENLABS_API_KEY
|
|
|
10 |
|
11 |
ELEVEN_CLIENT = ElevenLabs(api_key=ELEVENLABS_API_KEY)
|
12 |
|
@@ -45,6 +46,14 @@ async def tts_astream(
|
|
45 |
yield chunk
|
46 |
|
47 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
48 |
async def sound_generation_astream(
|
49 |
sound_generation_data: dict,
|
50 |
) -> t.AsyncIterator[bytes]:
|
|
|
7 |
load_dotenv()
|
8 |
|
9 |
from src.config import logger, ELEVENLABS_API_KEY
|
10 |
+
from src.utils import auto_retry
|
11 |
|
12 |
ELEVEN_CLIENT = ElevenLabs(api_key=ELEVENLABS_API_KEY)
|
13 |
|
|
|
46 |
yield chunk
|
47 |
|
48 |
|
49 |
+
@auto_retry
|
50 |
+
async def tts_astream_consumed(
|
51 |
+
voice_id: str, text: str, params: dict | None = None
|
52 |
+
) -> list[bytes]:
|
53 |
+
aiterator = tts_astream(voice_id=voice_id, text=text, params=params)
|
54 |
+
return [x async for x in aiterator]
|
55 |
+
|
56 |
+
|
57 |
async def sound_generation_astream(
|
58 |
sound_generation_data: dict,
|
59 |
) -> t.AsyncIterator[bytes]:
|
src/utils.py
CHANGED
@@ -28,7 +28,7 @@ async def consume_aiter(aiterator):
|
|
28 |
|
29 |
def auto_retry(f):
|
30 |
decorator = retry(
|
31 |
-
wait=wait_random_exponential(min=
|
32 |
-
stop=stop_after_attempt(
|
33 |
)
|
34 |
return decorator(f)
|
|
|
28 |
|
29 |
def auto_retry(f):
|
30 |
decorator = retry(
|
31 |
+
wait=wait_random_exponential(min=2, max=6),
|
32 |
+
stop=stop_after_attempt(10),
|
33 |
)
|
34 |
return decorator(f)
|