## Setup Notebook, Install


In [None]:
!git clone https://github.com/JonathanFly/bark.git
%cd bark
!pip install -r requirements-pip.txt
!pip install encodec rich-argparse
!pip install librosa pydub

## Run Once Per Notebook Restart (if files still exist)

In [None]:
import os
import time
from bark_infinity import config
import numpy as np

logger = config.logger
logger.setLevel("WARNING")

from bark_infinity import generation
from bark_infinity import api

import rich
from rich import print
from rich import pretty
from rich.pretty import pprint
from rich import inspect

from pydub import AudioSegment
import ipywidgets as widgets
from IPython.display import display, Audio
from io import BytesIO

# None of this code, just fiddlign with Colab stuff
# Just to save Colab with outputs and float32 wavs are GIGANTO
# actually this doesn't work, the iPython widget converts it back to float32? or I messed up

def display_audio_int16_but(audio_arr_segments, file_name, sample_rate=generation.SAMPLE_RATE,  width='200px'):
    file_name_label = widgets.Label(value=f"Playing: {file_name}")
    file_name_label.layout.width = width
    audio_data_int16 = audio_arr_segments
    if isinstance(audio_data_int16, list):
        audio_data_int16 = np.concatenate(audio_data_int16)        

    #audio_data_int16 = np.int16(audio_data_int16 * np.iinfo(np.int16).max)


    audio_widget = Audio(audio_data_int16, rate=sample_rate)
    display(file_name_label, audio_widget)
    

def on_button_click(button):
    audio_data, sample_rate = librosa.load(button.wav_path, sr=None)
    file_name = os.path.basename(button.wav_path)
    display_audio_int16_but(audio_data,file_name, sample_rate)


def display_wav_files(directory):
    subdirs, wav_files = [], []
    
    for item in os.listdir(directory):
        item_path = os.path.join(directory, item)
        
        if os.path.isfile(item_path) and item_path.endswith('.wav'):
            wav_files.append(item_path)
        elif os.path.isdir(item_path):
            subdirs.append(item_path)

    wav_files.sort(key=lambda x: os.path.basename(x))

    for wav_file in wav_files:

        filename = os.path.basename(wav_file)
        print(f" {filename}")
        button = widgets.Button(description=f"Play {filename}")
        button.wav_path = wav_file  
        button.on_click(on_button_click)
        display(button)

    for subdir in sorted(subdirs):
        print(f"<{subdir}>")
        display_wav_files(subdir)



## Generate


### Choose Bark Models

In [None]:
generation.OFFLOAD_CPU = True # On your home system set to True probably, but Colab GPU should have plenty of memory for all three models
generation.preload_models() # Optional, will lazy load if not preloaded. First time run in New Colab has to download models

In [None]:
text = """
Hark! I, the phantom visage of Edward Teach, rise from the 
abyss, forever bound to the briny depths. With me, brave the tumultuous seas, claim treasures untold,
and send foes to their watery doom.
"""

In [53]:
import os
import time
from bark_infinity import config
import numpy as np

logger = config.logger
logger.setLevel("WARNING")

from bark_infinity import generation
from bark_infinity import api

import rich
from rich import print
from rich import pretty
from rich.pretty import pprint
from rich import inspect


from pydub import AudioSegment
import ipywidgets as widgets
from IPython.display import display, Audio
from io import BytesIO
from tqdm import tqdm


# For split set split_character_goal_length and split_character_max_length
kwargs = {}

kwargs = config.load_all_defaults()
#kwargs['text_prompt'] = text
kwargs['hoarder_mode'] = True
kwargs["output_dir"] = 'bark_samples'
kwargs["history_prompt"] = None
# kwargs["single_starting_seed"] = None # 
# If you set seed you might want manually call generation.set_seed(-1) after to disable deterministic generation settings 
# I'm not cleaning up after this paramater at the moment and I'm not sure on other side effects
kwargs["stable_mode_interval"] = 1 # 0 for continous, 2,3,4 for mixed
kwargs["split_character_goal_length"] = 145
kwargs["split_character_max_length"] = 190
# kwargs["output_iterations"] = 1
kwargs["add_silence_between_segments"] = 0.0 # See: https://github.com/suno-ai/bark/blob/main/notebooks/long_form_generation.ipynb but not great for songs or stable_mode_interval 0
kwargs["semantic_min_eos_p"] = 0.2 # 0.20 is default, lower means more likely to stotp


# not sure on overall effect so far from these, but for example:
kwargs["semantic_top_k"] = None 
kwargs["semantic_top_p"] = None
kwargs["coarse_top_k"] = None
kwargs["coarse_top_p"] = None


In [None]:


from rich import prompt, print,inspect
pprint(kwargs)

In [54]:
kwargs['history_prompt'] = "custom_speakers/hark_woman.npz"
text = """With me, brave the tumultuous seas, claim treasures untold, and send foes to their watery 
doom"""
kwargs['text_prompt'] = text
kwargs['single_starting_seed'] = None
generation.set_seed(37)
logger.setLevel("DEBUG")

In [55]:
api.gradio_try_to_cancel = False

In [12]:
kwargs['output_dir'] = "long"
generation.set_seed(37)
full_generation_segments, audio_arr_segments, final_filename_will_be = api.generate_audio_long(**kwargs)

segment_text: With me, brave the tumultuous seas, claim treasures untold, and send foes to their watery doom


With me, brave the tumultuous seas, claim treasures untold, and send foes to their watery doom


before load all defaults


after load all defaults


100%|██████████| 100/100 [00:05<00:00, 17.65it/s]


100%|██████████| 22/22 [00:15<00:00,  1.44it/s]


100%|██████████| 2/2 [00:04<00:00,  2.20s/it]


Saved to long/With_me_brave_t-SPK-hark_woman_3.wav


In [14]:
Audio(audio_arr_segments, rate=generation.SAMPLE_RATE) 

In [11]:
inspect(full_generation_segments)

In [56]:
text = """With me, brave the tumultuous seas, claim treasures untold, and send foes to their watery 
doom"""
kwargs["semantic_min_eos_p"] = 0.2 
kwargs["text_prompt"] = text
generation.set_seed(37)
kwargs['output_full']  = True
kwargs['output_dir'] = "seed37"
kwargs["history_prompt"] = None
#kwargs['history_prompt'] = 'custom_speakers/en_fiery.npz'
barkifull, audio_arr_segments_barki = api.generate_audio_barki(text, **kwargs)

before load all defaults


after load all defaults


100%|██████████| 100/100 [00:05<00:00, 17.48it/s]


100%|██████████| 22/22 [00:15<00:00,  1.47it/s]


100%|██████████| 1/1 [00:04<00:00,  4.00s/it]


In [57]:
#inspect(barkifull)

for x in barkifull.keys():
    print(f"{x} {len(barkifull[x])}")

api.save_as_prompt("custom_speakers/seed37_barki.npz",barkifull)

In [59]:
Audio(audio_arr_segments_barki, rate=generation.SAMPLE_RATE)

In [None]:

def generate_audio_barki(
    text: str,
    **kwargs,
):
    """Generate audio array from input text.

    Args:
        text: text to be turned into audio
        history_prompt: history choice for audio cloning
        text_temp: generation temperature (1.0 more diverse, 0.0 more conservative)
        waveform_temp: generation temperature (1.0 more diverse, 0.0 more conservative)
        silent: disable progress bar
        output_full: return full generation to be used as a history prompt


    Returns:
        numpy audio array at sample frequency 24khz
    """
    logger.debug(locals())
    print("before load all defaults")
    kwargs = load_all_defaults(**kwargs)

    logger.debug(locals())
    print("after load all defaults")
    history_prompt = kwargs.get("history_prompt", None)
    text_temp = kwargs.get("text_temp", None)
    waveform_temp = kwargs.get("waveform_temp", None)
    silent = kwargs.get("silent", None)
    output_full = kwargs.get("output_full", None)

    global gradio_try_to_cancel
    global done_cancelling

    seed = kwargs.get("seed",None)
    if seed is not None:
        generation.set_seed(seed)

    ## TODO seperate stage seeds

    ## Semantic Options
    semantic_temp = text_temp
    if kwargs.get("semantic_temp", None):
        semantic_temp = kwargs.get("semantic_temp")

    semantic_seed = kwargs.get("semantic_seed",None)
    if semantic_seed is not None:
        generation.set_seed(semantic_seed)


    if gradio_try_to_cancel:
        done_cancelling = True
        return None, None
    semantic_tokens = call_with_non_none_params(
        generate_text_semantic,
        text=text,
        history_prompt=history_prompt,
        temp=semantic_temp,
        top_k=kwargs.get("semantic_top_k", None),
        top_p=kwargs.get("semantic_top_p", None),
        silent=silent,
        min_eos_p = kwargs.get("semantic_min_eos_p", None),
        max_gen_duration_s = kwargs.get("semantic_max_gen_duration_s", None),
        allow_early_stop = kwargs.get("semantic_allow_early_stop", True),
        use_kv_caching=kwargs.get("semantic_use_kv_caching", True),
    )
    
    if gradio_try_to_cancel:
        done_cancelling = True
        return None, None

    ## Coarse Options
    coarse_temp = waveform_temp
    if kwargs.get("coarse_temp", None):
        coarse_temp = kwargs.get("coarse_temp")

    coarse_seed = kwargs.get("coarse_seed",None)
    if coarse_seed is not None:
        generation.set_seed(coarse_seed)
        
    
    if gradio_try_to_cancel:
        done_cancelling = True
        return None, None
    
    coarse_tokens = call_with_non_none_params(
        generate_coarse,
        x_semantic=semantic_tokens,
        history_prompt=history_prompt,
        temp=coarse_temp,
        top_k=kwargs.get("coarse_top_k", None),
        top_p=kwargs.get("coarse_top_p", None),
        silent=silent,
        max_coarse_history=kwargs.get("coarse_max_coarse_history", None),
        sliding_window_len=kwargs.get("coarse_sliding_window_len", None),
        use_kv_caching=kwargs.get("coarse_kv_caching", True),
    )

    fine_temp = kwargs.get("fine_temp", 0.5)

    fine_seed = kwargs.get("fine_seed",None)
    if fine_seed is not None:
        generation.set_seed(fine_seed)

    if gradio_try_to_cancel:
        done_cancelling = True
        return None, None
    fine_tokens = call_with_non_none_params(
        generate_fine,
        x_coarse_gen=coarse_tokens,
        history_prompt=history_prompt,
        temp=fine_temp,
        silent=silent,
    )

    # do we ever care about setting this seed? Probably not? You can always just decode it again

    if gradio_try_to_cancel:
        done_cancelling = True
        return None, None
    audio_arr = codec_decode(fine_tokens)
    full_generation = {
        "semantic_prompt": semantic_tokens,
        "coarse_prompt": coarse_tokens,
        "fine_prompt": fine_tokens,
    }

    if gradio_try_to_cancel:
        done_cancelling = True
        return None, None
    
    hoarder_mode = kwargs.get("hoarder_mode", None)
    total_segments = kwargs.get("total_segments", 1)
    if hoarder_mode and (total_segments > 1):
        kwargs["text"] = text
        write_one_segment(audio_arr, full_generation, **kwargs)

    if output_full:
        return full_generation, audio_arr
    
    return audio_arr

In [60]:

generation.set_seed(37)
#kwargs['history_prompt'] = 'custom_speakers/en_fiery.npz'



#fullgen, gen_audio_orig_segs  = api.generate_audio(text, output_full=True, **kwargs)


fullgen, gen_audio_orig_segs  = api.generate_audio(text, output_full=True)

100%|██████████| 100/100 [00:04<00:00, 20.12it/s]


100%|██████████| 20/20 [00:13<00:00,  1.47it/s]


In [52]:

Audio(gen_audio_orig_segs, rate=generation.SAMPLE_RATE)



In [61]:
for k in fullgen:
    print(f"k: {k}")
    print(f"len: {len(fullgen[k])}")

In [31]:
api.render_npz_samples("custom_speakers", start_from="semantic")

Rendering samples for speakers in: custom_speakers
  Rendering audio for custom_speakers/hark_woman.npz to custom_speakers/hark_woman_1.wav


  Rendering audio for custom_speakers/hark1.npz to custom_speakers/hark1_1.wav


  Rendering audio for custom_speakers/en_fiery.npz to custom_speakers/en_fiery_1.wav


In [38]:
Audio(gen_audio_orig_segs, rate=generation.SAMPLE_RATE)

In [None]:


def generate_audio(
    text: str,
    history_prompt: Optional[Union[Dict, str]] = None,
    text_temp: float = 0.7,
    waveform_temp: float = 0.7,
    silent: bool = False,
    output_full: bool = False,
):
    """Generate audio array from input text.

    Args:
        text: text to be turned into audio
        history_prompt: history choice for audio cloning
        text_temp: generation temperature (1.0 more diverse, 0.0 more conservative)
        waveform_temp: generation temperature (1.0 more diverse, 0.0 more conservative)
        silent: disable progress bar
        output_full: return full generation to be used as a history prompt

    Returns:
        numpy audio array at sample frequency 24khz
    """
    semantic_tokens = text_to_semantic(
        text,
        history_prompt=history_prompt,
        temp=text_temp,
        silent=silent,
    )
    out = semantic_to_waveform(
        semantic_tokens,
        history_prompt=history_prompt,
        temp=waveform_temp,
        silent=silent,
        output_full=output_full,
    )
    if output_full:
        full_generation, audio_arr = out
        return full_generation, audio_arr
    else:
        audio_arr = out
    return audio_arr

## ADDED BELOW

In [17]:
Audio(audio_arr_segments_barki, rate=generation.SAMPLE_RATE)

In [49]:
kwargs['history_prompt'] = 'custom_speakers/hark_woman.npz'

In [None]:
print(kwargs)

In [45]:
api.render_npz_samples("custom_speakers", start_from="semantic_prompt")

Rendering samples for speakers in: custom_speakers


100%|██████████| 29/29 [00:25<00:00,  1.15it/s]


  Rendering audio for custom_speakers/hark1.npz to custom_speakers/hark1_1.wav


100%|██████████| 37/37 [00:33<00:00,  1.11it/s]


  Rendering audio for custom_speakers/hark3.npz to custom_speakers/hark3_1.wav


100%|██████████| 35/35 [00:31<00:00,  1.09it/s]


  Rendering audio for custom_speakers/en_fiery.npz to custom_speakers/en_fiery_1.wav


In [None]:
Audio(audio_arr_segments_barki, rate=generation.SAMPLE_RATE) 

In [None]:
generation.preload_models()

In [None]:
text = """
Hark! I, the phantom visage of Edward Teach, rise from the 
abyss, forever bound to the briny depths. With me, brave the tumultuous seas, claim treasures untold,
and send foes to their watery doom.
"""

In [None]:
history_prompt = np.load("pirates/base/pirate.npz")
from rich import inspect

In [None]:
for key in history_prompt.keys():
    length = len(history_prompt[key])
    print(f"key: {key}, length: {length}")
    inspect(history_prompt[key], title=f"{key} ({length})")


In [None]:
new_semantic = np.hstack([x_semantic_history, x_semantic]).astype(np.int32)

semantic_prompt = history_prompt["semantic_prompt"]
midpoint = len(semantic_prompt) // 2
new_semantic_first_half = semantic_prompt[:midpoint].astype(np.int32)


Instead I would like new_semantic to be half the size of x_semantic_history, just the last half of the space.

In [None]:
    if length > 0:
        for sub_key in history_prompt[key].keys():
            print(f"  {sub_key}={history_prompt[key][sub_key]}")


In [None]:

gen_minor_variants = 20
import random

npz_file = "pirate.npz"
npz_directory = "pirates/base"
npz_filepath = "pirates/base/pirate.npz"

semantic_prompt = history_prompt["semantic_prompt"]
original_semantic_prompt = semantic_prompt.copy()
starting_point = 128
ending_point = len(semantic_prompt) - starting_point



points = np.linspace(starting_point, ending_point, gen_minor_variants)
      
for starting_point in points:
    starting_point = int(starting_point)
    print(starting_point)

    new_semantic_from_beginning = original_semantic_prompt[:starting_point].astype(np.int32)
    new_semantic_from_ending = original_semantic_prompt[starting_point:].astype(np.int32)

    for semantic_prompt in [new_semantic_from_beginning, new_semantic_from_ending]:
        
        print(f"len(semantic_prompt): {len(semantic_prompt)}")
        print(f"starting_point: {starting_point}, ending_poinst: {ending_point}")  

        temp_coarse = random.uniform(0.5, 0.9)
        top_k_coarse = None if random.random() < 1/3 else random.randint(50, 100)
        top_p_coarse = None if random.random() < 1/3 else random.uniform(0.8, 0.95)

        max_coarse_history_options = [630, random.randint(500, 630), random.randint(60, 500)]
        max_coarse_history = random.choice(max_coarse_history_options)

        coarse_tokens = generation.generate_coarse(semantic_prompt, temp=temp_coarse, top_k=top_k_coarse, top_p=top_p_coarse, max_coarse_history=max_coarse_history)

        temp_fine = random.uniform(0.3, 0.7)
        fine_tokens = generation.generate_fine(coarse_tokens, temp=temp_fine)

        history_prompt_render_variant = {"semantic_prompt": semantic_prompt, "coarse_prompt": coarse_tokens, "fine_prompt": fine_tokens}

        try:
            audio_arr = generation.codec_decode(fine_tokens)
            base_output_filename = os.path.splitext(npz_file)[0] + f"_var_{i}.wav"
            output_filepath = os.path.join(npz_directory, base_output_filename)
            output_filepath = api.generate_unique_filepath(output_filepath)
            print(f"  Rendering minor variant voice audio for {npz_filepath} to {output_filepath}")
            api.write_seg_wav(output_filepath, audio_arr)

            api.write_seg_npz(output_filepath, history_prompt_render_variant)
        except:
            print(f"  <Error rendering audio for {npz_filepath}>")

### Set Text and Other Generation Options

In [None]:
text = """
Hey, have you heard about this new text-to-audio model called "Bark"? 
It's like rain on your wedding day. It's a free ride when you've already paid. It's the good advice that you just didn't take.
And who would've thought? It figures.

Well, life has a funny way of sneaking up on you. When you think everything's okay and everything's going right. 
And life has a funny way of helping you out. When you think everything's gone wrong. 
And everything blows up in your face.

It's a traffic jam when you're already late. A "No smoking" sign on your cigarette break.
It's like ten thousand spoons when all you need is a knife. It's meeting the man of my dreams.
And then meeting his beautiful wife.

And isn't it ironic? Don't you think? A little too ironic.
And yeah, I really do think.
"""

# FOr split set split_character_goal_length and split_character_max_length
kwargs = {}

kwargs = config.load_all_defaults()
kwargs['text_prompt'] = text
kwargs['hoarder_mode'] = True
kwargs["output_dir"] = 'bark_samples'
kwargs["history_prompt"] = None
# kwargs["single_starting_seed"] = None # 
# If you set seed you might want manually call generation.set_seed(-1) after to disable deterministic generation settings 
# I'm not cleaning up after this paramater at the moment and I'm not sure on other side effects
kwargs["stable_mode_interval"] = 1 # 0 for continous, 2,3,4 for mixed
kwargs["split_character_goal_length"] = 90
kwargs["split_character_max_length"] = 130
# kwargs["output_iterations"] = 1
kwargs["add_silence_between_segments"] = .025 # See: https://github.com/suno-ai/bark/blob/main/notebooks/long_form_generation.ipynb but not great for songs or stable_mode_interval 0
kwargs["semantic_min_eos_p"] = 0.05 # 0.20 is default, lower means more likely to stotp


# not sure on overall effect so far from these, but for example:
kwargs["semantic_top_k"] = 50
kwargs["semantic_top_p"] = 0.95

### First Attempt

#### Before we run, let's double check out settings

In [None]:
kwargs["dry_run"] = True # Check how the text is being split, don't actually run the model. 
full_generation_segments, audio_arr_segments, final_filename_will_be = api.generate_audio_long(**kwargs)

In [None]:
# that's the output we expect to see, we didn't generate audio yet
# these text segments look a little small small so let's try this instead
kwargs["split_character_goal_length"] = 110
kwargs["split_character_max_length"] = 175

full_generation_segments, audio_arr_segments, final_filename_will_be = api.generate_audio_long(**kwargs)

#### Run Bark

In [None]:
# These segement sizes look better so now so set dry_run to False to run for real
# Because we set hoarder_mode we can see the wav files for each segment in the Colab File Manager

kwargs["dry_run"] = False
full_generation_segments, audio_arr_segments, final_filename_will_be = api.generate_audio_long(**kwargs)

In [None]:
print(f"  final wav at {final_filename_will_be}  ")
# (we see many wav because we set hoarder_mode, but one file will be the final product
# set hoarder_mode=False if you just want the file wav and aren't in explore mode

# or play here 
Audio(np.concatenate(audio_arr_segments), rate=generation.SAMPLE_RATE) 


In [None]:
# because we set hoarder mode we also saved each segement as its own seperate sample with wav

!find "bark_samples/" -name "*.npz"

display_wav_files("bark_samples/")

### Second Attempt. Can we do better?

In [None]:
# we used stable_mode_interval = 1, so the history_prompt does not evolve between segments
# even still the voices that are saved for each segment are one-generation different than the original history prompt
# this means they are a *little* bit different, and we may prefer one of them over the original
# for example maybe segment 2 was a little more clear, or had a particular emotion, we could use that segment's version as the speaker
# in the particular run I'm doing now, that segment ended with a little bit an interesting accent. I'm curious if I can bring that out more.

# (should probably rename the file to something sensible though)

kwargs["history_prompt"] = "/content/bark/bark_samples/Hey_have_you_heard_a-SPK-random.wav/002_Its_the_good_advice_-SPK-random.wav.npz"

In [None]:
kwargs["text_prompt"] = f"I'm speaker number two. I'm the best speaker. Also I'm a free spirit. Let me evolve my voice with every step. Here's my version."
kwargs["text_prompt"] += text
kwargs["stable_mode_interval"] = 0 
kwargs["output_dir"] = "speaker_2_test"
kwargs["add_silence_between_segments"] = 0.0 # No silence, fully merge clips

kwargs["semantic_min_eos_p"] = 0.20 # Back to default, let Bark umm and ahh a bit
full_generation_segments, audio_arr_segments, final_filename_will_be = api.generate_audio_long(**kwargs)



In [None]:
print(f"  final wave at {final_filename_will_be}")
Audio(np.concatenate(audio_arr_segments), rate=generation.SAMPLE_RATE) 

In [None]:
# this clip probably got really weird after a full segments, fully feedbacking into itself. So kwargs["stable_mode_interval"] = 3 might be a good compromise

display_wav_files("speaker_2_test")

### Finding Our Voice

In [None]:
# That final clip is an improvement, the random voice we got isn't bad but it's not quite doing our beautiful prose justice
# we could use an existing history_prompt, but let's try to summon a perfect speaker from the model
# we do that by generating many speakers randomly
# we could use our first segment text, in my experience there is a better method
# try to image: what type of text would be the context in voice I want to hear is likely to appear?
# then let's generate 20 sample clips from that

# TODO

In [None]:

text="""
How many fucken morons can wave his arms and keep people in tempo? 
I was there to push people beyond what's expected of them. 
I believe that is an absolute necessity. 
Otherwise we're depriving the world of the next Louis Armstrong, 
or the next Charlie Parker. 
Have I told you that story about how Charlie Parker became Charlie Parker?
Parker's a young kid, pretty good on the Sax, 
gets up to play at a cutting session, 
and well, he fucks it up. 
And Jones nearly decapitates him for it, throws a cymbal at his head. 
And Charlie's laughed off stage. Cries himself to sleep that night. 
But the next morning, what does he do? He practices. 
And he practices and he practices and he practices. 
With one goal in mind - never to be laughed at again. 
And a year later he goes back to the Reno, 
and he steps up on that stage 
and he plays the best motherfucken solo the world has ever heard. 
So imagine if Jones had just said, 
"Well that's okay, Charlie, that was alright. Good job.” 
And Charlie thinks to himself, “Well, shit I did do a pretty good job.” 
End of story. That to me is an absolute tragedy. 
But that's just what the world wants now. 
No wonder Jazz is dying.

[He takes a sip of his drink.]
I tell you man, every Starbucks “Jazz” album, just proves my point really 
- there are no two words more harmful in the English language than “Good job”.

The truth is Andrew I… never really had a Charlie Parker. 
But I tried. I actually fucking tried. 
And that's more than most people ever do, 
and I will never apologise for how I tried.
"""

In [None]:
import re
def apply_rule_to_prompt(regex, replacement, flags, text):

  
    re_flags = 0
    if 'MULTILINE' in flags:
        re_flags |= re.MULTILINE
    return re.sub(regex, replacement, text, flags=re_flags)

In [None]:
regex = "[\.?,]"
replacement = " "
flags = "MULTILINE"

print(apply_rule_to_prompt(regex, replacement, flags, text))

In [None]:
from typing import List
import re
import random 
from typing import Dict, Optional, Union
import logging
logger = logging.getLogger(__name__)
def split_text(text: str, split_type: Optional[str] = None, split_type_quantity = 1, split_type_string: Optional[str] = None, split_type_value_type: Optional[str] = None) -> List[str]:
    if text == '':
        return [text]

    # the old syntax still works if you don't use this parameter, ie
    # split_type line, split_type_value 4, splits into groups of 4 lines
    if split_type_value_type == '':
        split_type_value_type = split_type

    """
    if split_type == 'phrase':
        # print(f"Loading spacy to split by phrase.")
        nlp = spacy.load('en_core_web_sm')

        chunks = split_by_phrase(text, nlp)
        # print(chunks)
        return chunks
    """
    if split_type == 'string' or split_type == 'regex':

        if split_type_string is None:
            logger.warning(
                f"Splitting by {split_type} requires a string to split by. Returning original text.")
            return [text]

    split_type_to_function = {
        'word': split_by_words,
        'line': split_by_lines,
        # 'sentence': split_by_sentences,
        'string': split_by_string,
        #'random': split_by_random,
        # 'rhyme': split_by_rhymes,
        # 'pos': split_by_part_of_speech,
        'regex': split_by_regex,
    }

    if split_type in split_type_to_function:
        # split into groups of 1 by the desired type
        # this is so terrible even I'm embarassed, destroy all this code later, but I guess it does something useful atm
        segmented_text = split_type_to_function[split_type](text, split_type = split_type, split_type_quantity=1, split_type_string=split_type_string, split_type_value_type=split_type_value_type)
        final_segmented_text = []
        current_segment = ''
        split_type_quantity_found = 0
        for seg in segmented_text: # for each line, for example, we can now split by 'words' or whatever, as a counter for when to break the group
            current_segment += seg

            #print(split_type_to_function[split_type](current_segment, split_type=split_type_value_type, split_type_quantity=1, split_type_string=split_type_string))
            split_type_quantity_found = len(split_type_to_function[split_type](current_segment, split_type=split_type_value_type, split_type_quantity=1, split_type_string=split_type_string))
            print(f"I see {split_type_quantity_found} {split_type_value_type} in {current_segment}")
            if split_type_quantity_found >= split_type_quantity:
                final_segmented_text.append(current_segment)
                split_type_quantity_found = 0
                current_segment = ''
            
        return final_segmented_text

    logger.warning(
        f"Splitting by {split_type} not a supported option. Returning original text.")
    return [text]

def split_by_string(text: str, split_type: Optional[str] = None, split_type_quantity: Optional[int] = 1, split_type_string: Optional[str] = None, split_type_value_type: Optional[str] = None) -> List[str]:
    if split_type_string is not None:
        split_pattern = f"({split_type_string})"
        split_list = re.split(split_pattern, text)
        result = [split_list[0]]
        for i in range(1, len(split_list), 2):
            result.append(split_list[i] + split_list[i+1])
        return result
    else:
        return text.split()

def split_by_regex(text: str, pattern: str) -> List[str]:
    chunks = []
    start = 0

    for match in re.finditer(pattern, text):
        end = match.start()
        chunks.append(text[start:end].strip())
        start = end

    chunks.append(text[start:].strip())
    return chunks


def split_by_words(text: str, split_type: Optional[str] = None, split_type_quantity = 1, split_type_string: Optional[str] = None, split_type_value_type: Optional[str] = None) -> List[str]:
    words = text.split()
    return [' '.join(word for word in words)]
    #return [' '.join(words[i:i + split_type_quantity]) for i in range(0, len(words), split_type_quantity)]


def split_by_lines(text: str, split_type: Optional[str] = None, split_type_quantity = 1, split_type_string: Optional[str] = None, split_type_value_type: Optional[str] = None) -> List[str]:
    lines = [line + '\n' for line in text.split('\n') if line.strip()]
    return lines
    #return ['\n'.join(lines[i:i + split_type_quantity]) for i in range(0, len(lines), split_type_quantity)]

"""
def split_by_sentences(text: str, n: int, language="en") -> List[str]:
    seg = pysbd.Segmenter(language=language, clean=False)
    sentences = seg.segment(text)
    return [' '.join(sentences[i:i + n]) for i in range(0, len(sentences), n)]
"""

def load_text(file_path: str) -> Union[str, None]:
    try:
        with open(file_path, "r", encoding="utf-8") as f:
            content = f.read()
        logger.info(f"Successfully loaded the file: {file_path}")
        return content
    except FileNotFoundError:
        logger.error(f"File not found: {file_path}")
    except PermissionError:
        logger.error(f"Permission denied to read the file: {file_path}")
    except Exception as e:
        logger.error(
            f"An unexpected error occurred while reading the file: {file_path}. Error: {e}")
    return None


# Good for just exploring random voices, rethink api
"""
def split_by_random(text: str, split_type: Optional[str] = None, split_type_quantity = 1, split_type_string: Optional[str] = None, split_type_value_type: Optional[str] = None) -> List[str]:
    segments = split_text(text, split_type_string)
    chunks = []
    min_len = max(1, split_type_quantity - 2)
    max_len = split_type_quantity + 2
    while words:
        chunk_len = random.randint(min_len, max_len)
        chunk = ' '.join(words[:chunk_len])
        chunks.append(chunk)
        words = words[chunk_len:]
    return chunks
"""

In [None]:
print(split_by_lines(text))

In [None]:
split_text(text="A dig whe sdf lskfj.", split_type="word")

In [None]:
print(split_text(text, split_type="line", split_type_quantity=4, split_type_value_type="line"))


In [None]:
text = """
It's like rain on your wedding day.
It's a free ride when you've already paid.
It's the good advice that you just didn't take.
And who would've thought? It figures.
It's like rain on your wedding day.
It's a free ride when you've already paid.
It's the good advice that you just didn't take.
And who would've thought? It figures.
"""

result = split_text(text, split_type="line", split_type_quantity=3)
print(result)

result = split_text(text, split_type="word", split_type_quantity=4)
print(result)

In [None]:
result = split_text(text, split_type="string", split_type_quantity=23, split_type_string = "the")
print(result)

In [None]:
print(split_by_string("The dog went to the dog store and ate", split_type="string", split_type_quantity=23, split_type_string="dog"))


In [None]:
split_text(text, split_type="line", split_type_quantity=3, split_type_value_type="word")