import download_missing_booknlp_models |
import re |
import glob |
def process_large_numbers_in_txt(file_path): |
with open(file_path, 'r') as file: |
content = file.read() |
pattern = r'\b\d{1,3}(,\d{3})+\b' |
modified_content = re.sub(pattern, lambda m: m.group().replace(',', ''), content) |
with open(file_path, 'w') as file: |
file.write(modified_content) |
import pandas as pd |
def remove_empty_text_rows(csv_file): |
data = pd.read_csv(csv_file) |
data = data[data['Text'].notna() & (data['Text'] != '')] |
data.to_csv(csv_file, index=False) |
print(f"Rows with empty 'Text' column have been removed from {csv_file}") |
import pandas as pd |
def process_and_split_csv(file_path, split_string): |
def split_text(text, split_string, original_row): |
split_index = text.find(split_string) |
parts = text.split(split_string) |
new_rows = [] |
start_location = original_row['Start Location'] |
for index, part in enumerate(parts): |
new_row = original_row.copy() |
if index == 0: |
new_row['Text'] = part |
new_row['End Location'] = start_location + split_index |
else: |
new_row['Text'] = split_string + part |
new_row['Start Location'] = start_location + split_index |
new_row['End Location'] = start_location + split_index + len(split_string) + len(part) |
split_index += len(split_string) + len(part) |
new_rows.append(new_row) |
return new_rows |
def process_csv(df, split_string): |
new_rows = [] |
for _, row in df.iterrows(): |
text = row['Text'] |
if isinstance(text, str) and split_string in text: |
new_rows.extend(split_text(text, split_string, row)) |
else: |
new_rows.append(row) |
return pd.DataFrame(new_rows) |
df = pd.read_csv(file_path) |
new_df = process_csv(df, split_string) |
new_df.to_csv(file_path, index=False) |
import os |
import subprocess |
import ebooklib |
from ebooklib import epub |
from bs4 import BeautifulSoup |
import re |
import csv |
import nltk |
import shutil |
def create_chapter_labeled_book(ebook_file_path): |
def ensure_directory(directory_path): |
if not os.path.exists(directory_path): |
os.makedirs(directory_path) |
print(f"Created directory: {directory_path}") |
ensure_directory('Working_files/Book') |
def convert_to_epub(input_path, output_path): |
try: |
subprocess.run(['ebook-convert', input_path, output_path], check=True) |
except subprocess.CalledProcessError as e: |
print(f"An error occurred while converting the eBook: {e}") |
return False |
return True |
def save_chapters_as_text(epub_path): |
directory = "Working_files/temp_ebook" |
if os.path.exists(directory): |
shutil.rmtree(directory) |
ensure_directory(directory) |
book = epub.read_epub(epub_path) |
previous_chapter_text = '' |
previous_filename = '' |
chapter_counter = 0 |
for item in book.get_items(): |
if item.get_type() == ebooklib.ITEM_DOCUMENT: |
soup = BeautifulSoup(item.get_content(), 'html.parser') |
text = soup.get_text() |
if text.strip(): |
if len(text) < 2300 and previous_filename: |
with open(previous_filename, 'a', encoding='utf-8') as file: |
file.write('\n' + text) |
else: |
previous_filename = os.path.join(directory, f"chapter_{chapter_counter}.txt") |
chapter_counter += 1 |
with open(previous_filename, 'w', encoding='utf-8') as file: |
file.write(text) |
print(f"Saved chapter: {previous_filename}") |
input_ebook = ebook_file_path |
output_epub = 'Working_files/temp.epub' |
if os.path.exists(output_epub): |
os.remove(output_epub) |
print(f"File {output_epub} has been removed.") |
else: |
print(f"The file {output_epub} does not exist.") |
if convert_to_epub(input_ebook, output_epub): |
save_chapters_as_text(output_epub) |
nltk.download('punkt') |
""" |
def process_chapter_files(folder_path, output_csv): |
with open(output_csv, 'w', newline='', encoding='utf-8') as csvfile: |
writer = csv.writer(csvfile) |
# Write the header row |
writer.writerow(['Text', 'Start Location', 'End Location', 'Is Quote', 'Speaker', 'Chapter']) |
# Process each chapter file |
chapter_files = sorted(os.listdir(folder_path), key=lambda x: int(x.split('_')[1].split('.')[0])) |
for filename in chapter_files: |
if filename.startswith('chapter_') and filename.endswith('.txt'): |
chapter_number = int(filename.split('_')[1].split('.')[0]) |
file_path = os.path.join(folder_path, filename) |
try: |
with open(file_path, 'r', encoding='utf-8') as file: |
text = file.read() |
sentences = nltk.tokenize.sent_tokenize(text) |
for sentence in sentences: |
start_location = text.find(sentence) |
end_location = start_location + len(sentence) |
writer.writerow([sentence, start_location, end_location, 'True', 'Narrator', chapter_number]) |
except Exception as e: |
print(f"Error processing file {filename}: {e}") |
""" |
def process_chapter_files(folder_path, output_csv): |
with open(output_csv, 'w', newline='', encoding='utf-8') as csvfile: |
writer = csv.writer(csvfile) |
writer.writerow(['Text', 'Start Location', 'End Location', 'Is Quote', 'Speaker', 'Chapter']) |
chapter_files = sorted(os.listdir(folder_path), key=lambda x: int(x.split('_')[1].split('.')[0])) |
for filename in chapter_files: |
if filename.startswith('chapter_') and filename.endswith('.txt'): |
chapter_number = int(filename.split('_')[1].split('.')[0]) |
file_path = os.path.join(folder_path, filename) |
try: |
with open(file_path, 'r', encoding='utf-8') as file: |
text = file.read() |
if text: |
text = "NEWCHAPTERABC" + text |
sentences = nltk.tokenize.sent_tokenize(text) |
for sentence in sentences: |
start_location = text.find(sentence) |
end_location = start_location + len(sentence) |
writer.writerow([sentence, start_location, end_location, 'True', 'Narrator', chapter_number]) |
except Exception as e: |
print(f"Error processing file {filename}: {e}") |
folder_path = "Working_files/temp_ebook" |
output_csv = 'Working_files/Book/Other_book.csv' |
process_chapter_files(folder_path, output_csv) |
def wipe_folder(folder_path): |
if not os.path.exists(folder_path): |
print(f"The folder {folder_path} does not exist.") |
return |
for filename in os.listdir(folder_path): |
file_path = os.path.join(folder_path, filename) |
if os.path.isfile(file_path): |
try: |
os.remove(file_path) |
print(f"Removed file: {file_path}") |
except Exception as e: |
print(f"Failed to remove {file_path}. Reason: {e}") |
else: |
print(f"Skipping directory: {file_path}") |
def sort_key(filename): |
"""Extract chapter number for sorting.""" |
match = re.search(r'chapter_(\d+)\.txt', filename) |
return int(match.group(1)) if match else 0 |
def combine_chapters(input_folder, output_file): |
os.makedirs(os.path.dirname(output_file), exist_ok=True) |
files = [f for f in os.listdir(input_folder) if f.endswith('.txt')] |
sorted_files = sorted(files, key=sort_key) |
with open(output_file, 'w') as outfile: |
for i, filename in enumerate(sorted_files): |
with open(os.path.join(input_folder, filename), 'r') as infile: |
outfile.write(infile.read()) |
if i < len(sorted_files) - 1: |
outfile.write("\nNEWCHAPTERABC\n") |
input_folder = 'Working_files/temp_ebook' |
output_file = 'Working_files/Book/Chapter_Book.txt' |
combine_chapters(input_folder, output_file) |
ensure_directory('Working_files/Book') |
import os |
import subprocess |
import tkinter as tk |
from tkinter import filedialog, messagebox |
from epub2txt import epub2txt |
from booknlp.booknlp import BookNLP |
import nltk |
import re |
nltk.download('averaged_perceptron_tagger') |
epub_file_path = "" |
chapters = [] |
ebook_file_path = "" |
input_file_is_txt = False |
def convert_epub_and_extract_chapters(epub_path): |
chapter_pattern = re.compile(r'Detected chapter: \* (.*)') |
chapter_names = [] |
process = subprocess.Popen(['ebook-convert', epub_path, '/dev/null'], |
stdout=subprocess.PIPE, |
stderr=subprocess.STDOUT, |
universal_newlines=True) |
for line in iter(process.stdout.readline, ''): |
print(line, end='') |
match = chapter_pattern.search(line) |
if match: |
chapter_names.append(match.group(1)) |
process.stdout.close() |
process.wait() |
return chapter_names |
def calibre_installed(): |
"""Check if Calibre's ebook-convert tool is available.""" |
try: |
subprocess.run(['ebook-convert', '--version'], stdout=subprocess.PIPE, stderr=subprocess.PIPE) |
return True |
except FileNotFoundError: |
print("""ERROR NO CALIBRE: running epub2txt convert version... |
It appears you dont have the calibre commandline tools installed on your, |
This will allow you to convert from any ebook file format: |
Calibre supports the following input formats: CBZ, CBR, CBC, CHM, EPUB, FB2, HTML, LIT, LRF, MOBI, ODT, PDF, PRC, PDB, PML, RB, RTF, SNB, TCR, TXT. |
If you want this feature please follow online instruction for downloading the calibre commandline tool. |
For Linux its: |
sudo apt update && sudo apt upgrade |
sudo apt install calibre |
""") |
return False |
def get_newest_file_in_input_folder(): |
input_dir = "input_files" |
list_of_files = glob.glob(os.path.join(input_dir, "*")) |
if list_of_files: |
newest_file = max(list_of_files, key=os.path.getctime) |
return newest_file |
else: |
print("I couln't find any files there") |
return None |
def convert_with_calibre(file_path, output_format="txt"): |
"""Convert a file using Calibre's ebook-convert tool.""" |
output_path = file_path.rsplit('.', 1)[0] + '.' + output_format |
subprocess.run(['ebook-convert', file_path, output_path]) |
return output_path |
import os |
import subprocess |
import sys |
def process_file_headless(): |
while True: |
gradio_input_file = sys.argv[1] |
file_path = gradio_input_file |
if os.path.isfile(file_path): |
break |
else: |
print("File not found. Please try again.") |
ebook_file_path = file_path |
input_file_is_txt = file_path.lower().endswith('.txt') |
if not os.path.exists(file_path): |
print("File not found. Please check the path and try again.") |
return |
if file_path.lower().endswith(('.cbz', '.cbr', '.cbc', '.chm', '.epub', '.fb2', '.html', '.lit', '.lrf', |
'.mobi', '.odt', '.pdf', '.prc', '.pdb', '.pml', '.rb', '.rtf', '.snb', '.tcr')) and calibre_installed(): |
file_path = convert_with_calibre(file_path) |
elif file_path.lower().endswith('.epub') and not calibre_installed(): |
content = epub2txt(file_path) |
if not os.path.exists('Working_files'): |
os.makedirs('Working_files') |
file_path = os.path.join('Working_files', 'Book.txt') |
with open(file_path, 'w', encoding='utf-8') as f: |
f.write(content) |
elif not file_path.lower().endswith('.txt'): |
print("Selected file format is not supported or Calibre is not installed.") |
return |
book_id = "Book" |
output_directory = os.path.join('Working_files', book_id) |
model_params = { |
"pipeline": "entity,quote,supersense,event,coref", |
"model": "big" |
} |
process_large_numbers_in_txt(file_path) |
booknlp = BookNLP("en", model_params) |
if calibre_installed(): |
create_chapter_labeled_book(file_path) |
booknlp.process('Working_files/Book/Chapter_Book.txt', output_directory, book_id) |
if not input_file_is_txt: |
os.remove(file_path) |
print(f"Deleted file: {file_path} because it's not needed anymore after the ebook conversion to txt") |
else: |
booknlp.process(file_path, output_directory, book_id) |
print("Success, File processed successfully!") |
if __name__ == "__main__": |
process_file_headless() |
import pandas as pd |
def filter_and_correct_quotes(file_path): |
with open(file_path, 'r', encoding='utf-8') as file: |
lines = file.readlines() |
corrected_lines = [] |
for line in lines: |
if line.count('"') % 2 == 0: |
corrected_lines.append(line) |
with open(file_path, 'w', encoding='utf-8') as file: |
file.writelines(corrected_lines) |
print(f"Processed {len(lines)} lines.") |
print(f"Removed {len(lines) - len(corrected_lines)} problematic lines.") |
print(f"Wrote {len(corrected_lines)} lines back to the file.") |
if __name__ == "__main__": |
file_path = "Working_files/Book/Book.quotes" |
filter_and_correct_quotes(file_path) |
import pandas as pd |
import re |
import glob |
import os |
def process_files(quotes_file, tokens_file): |
skip_rows = [] |
while True: |
try: |
df_quotes = pd.read_csv(quotes_file, delimiter="\t", skiprows=skip_rows) |
break |
except pd.errors.ParserError as e: |
msg = str(e) |
match = re.search(r'at row (\d+)', msg) |
if match: |
problematic_row = int(match.group(1)) |
print(f"Skipping problematic row {problematic_row} in {quotes_file}") |
skip_rows.append(problematic_row) |
else: |
print(f"Error reading {quotes_file}: {e}") |
return |
df_tokens = pd.read_csv(tokens_file, delimiter="\t", on_bad_lines='skip', quoting=3) |
last_end_id = 0 |
nonquotes_data = [] |
for index, row in df_quotes.iterrows(): |
start_id = row['quote_start'] |
end_id = row['quote_end'] |
filtered_tokens = df_tokens[(df_tokens['token_ID_within_document'] > last_end_id) & |
(df_tokens['token_ID_within_document'] < start_id)] |
words_chunk = ' '.join([str(token_row['word']) for index, token_row in filtered_tokens.iterrows()]) |
words_chunk = words_chunk.replace(" n't", "n't").replace(" n’", "n’").replace("( ", "(").replace(" ,", ",").replace("gon na", "gonna").replace(" n’t", "n’t") |
words_chunk = re.sub(r' (?=[^a-zA-Z0-9\s])', '', words_chunk) |
if words_chunk: |
nonquotes_data.append([words_chunk, last_end_id, start_id, "False", "Narrator"]) |
last_end_id = end_id |
nonquotes_df = pd.DataFrame(nonquotes_data, columns=["Text", "Start Location", "End Location", "Is Quote", "Speaker"]) |
output_filename = os.path.join(os.path.dirname(quotes_file), "non_quotes.csv") |
nonquotes_df.to_csv(output_filename, index=False) |
print(f"Saved nonquotes.csv to {output_filename}") |
def main(): |
quotes_files = glob.glob('Working_files/**/*.quotes', recursive=True) |
tokens_files = glob.glob('Working_files/**/*.tokens', recursive=True) |
for q_file in quotes_files: |
base_name = os.path.splitext(os.path.basename(q_file))[0] |
matching_token_files = [t_file for t_file in tokens_files if os.path.splitext(os.path.basename(t_file))[0] == base_name] |
if matching_token_files: |
process_files(q_file, matching_token_files[0]) |
print("All processing complete!") |
if __name__ == "__main__": |
main() |
import pandas as pd |
import re |
import glob |
import os |
import nltk |
def process_files(quotes_file, entities_file): |
df_quotes = pd.read_csv(quotes_file, delimiter="\t") |
df_entities = pd.read_csv(entities_file, delimiter="\t") |
character_info = {} |
def is_pronoun(word): |
tagged_word = nltk.pos_tag([word]) |
return 'PRP' in tagged_word[0][1] or 'PRP$' in tagged_word[0][1] |
def get_gender(pronoun): |
male_pronouns = ['he', 'him', 'his'] |
female_pronouns = ['she', 'her', 'hers'] |
if pronoun in male_pronouns: |
return 'Male' |
elif pronoun in female_pronouns: |
return 'Female' |
return 'Unknown' |
for index, row in df_quotes.iterrows(): |
char_id = row['char_id'] |
mention = row['mention_phrase'] |
if char_id not in character_info: |
character_info[char_id] = {"names": {}, "pronouns": {}, "quote_count": 0} |
if is_pronoun(mention): |
character_info[char_id]["pronouns"].setdefault(mention.lower(), 0) |
character_info[char_id]["pronouns"][mention.lower()] += 1 |
else: |
character_info[char_id]["names"].setdefault(mention, 0) |
character_info[char_id]["names"][mention] += 1 |
character_info[char_id]["quote_count"] += 1 |
for index, row in df_entities.iterrows(): |
coref = row['COREF'] |
name = row['text'] |
if coref in character_info: |
if is_pronoun(name): |
character_info[coref]["pronouns"].setdefault(name.lower(), 0) |
character_info[coref]["pronouns"][name.lower()] += 1 |
else: |
character_info[coref]["names"].setdefault(name, 0) |
character_info[coref]["names"][name] += 1 |
for char_id, info in character_info.items(): |
most_likely_name = max(info["names"].items(), key=lambda x: x[1])[0] if info["names"] else "Unknown" |
most_common_pronoun = max(info["pronouns"].items(), key=lambda x: x[1])[0] if info["pronouns"] else None |
gender = get_gender(most_common_pronoun) if most_common_pronoun else 'Unknown' |
gender_suffix = ".M" if gender == 'Male' else ".F" if gender == 'Female' else ".?" |
info["formatted_speaker"] = f"{char_id}:{most_likely_name}{gender_suffix}" |
info["most_likely_name"] = most_likely_name |
info["gender"] = gender |
output_filename = os.path.join(os.path.dirname(quotes_file), "quotes.csv") |
with open(output_filename, 'w', newline='') as outfile: |
fieldnames = ["Text", "Start Location", "End Location", "Is Quote", "Speaker"] |
writer = pd.DataFrame(columns=fieldnames) |
for index, row in df_quotes.iterrows(): |
char_id = row['char_id'] |
if not re.search('[a-zA-Z0-9]', row['quote']): |
print(f"Removing row with text: {row['quote']}") |
continue |
if character_info[char_id]["quote_count"] == 1: |
formatted_speaker = "Narrator" |
else: |
formatted_speaker = character_info[char_id]["formatted_speaker"] if char_id in character_info else "Unknown" |
new_row = {"Text": row['quote'], "Start Location": row['quote_start'], "End Location": row['quote_end'], "Is Quote": "True", "Speaker": formatted_speaker} |
new_row_df = pd.DataFrame([new_row]) |
writer = pd.concat([writer, new_row_df], ignore_index=True) |
writer.to_csv(output_filename, index=False) |
print(f"Saved quotes.csv to {output_filename}") |
def main(): |
quotes_files = glob.glob('Working_files/**/*.quotes', recursive=True) |
entities_files = glob.glob('Working_files/**/*.entities', recursive=True) |
for q_file in quotes_files: |
base_name = os.path.splitext(os.path.basename(q_file))[0] |
matching_entities_files = [e_file for e_file in entities_files if os.path.splitext(os.path.basename(e_file))[0] == base_name] |
if matching_entities_files: |
process_files(q_file, matching_entities_files[0]) |
print("All processing complete!") |
if __name__ == "__main__": |
main() |
import pandas as pd |
import re |
import glob |
import os |
def process_files(quotes_file, tokens_file): |
df_quotes = pd.read_csv(quotes_file, delimiter="\t") |
df_tokens = pd.read_csv(tokens_file, delimiter="\t", on_bad_lines='skip', quoting=3) |
last_end_id = 0 |
nonquotes_data = [] |
for index, row in df_quotes.iterrows(): |
start_id = row['quote_start'] |
end_id = row['quote_end'] |
filtered_tokens = df_tokens[(df_tokens['token_ID_within_document'] > last_end_id) & |
(df_tokens['token_ID_within_document'] < start_id)] |
words_chunk = ' '.join([str(token_row['word']) for index, token_row in filtered_tokens.iterrows()]) |
words_chunk = words_chunk.replace(" n't", "n't").replace(" n’", "n’").replace(" ’", "’").replace(" ,", ",").replace(" .", ".").replace(" n’t", "n’t") |
words_chunk = re.sub(r' (?=[^a-zA-Z0-9\s])', '', words_chunk) |
if words_chunk: |
nonquotes_data.append([words_chunk, last_end_id, start_id, "False", "Narrator"]) |
last_end_id = end_id |
nonquotes_df = pd.DataFrame(nonquotes_data, columns=["Text", "Start Location", "End Location", "Is Quote", "Speaker"]) |
output_filename = os.path.join(os.path.dirname(quotes_file), "non_quotes.csv") |
nonquotes_df.to_csv(output_filename, index=False) |
print(f"Saved nonquotes.csv to {output_filename}") |
def main(): |
quotes_files = glob.glob('Working_files/**/*.quotes', recursive=True) |
tokens_files = glob.glob('Working_files/**/*.tokens', recursive=True) |
for q_file in quotes_files: |
base_name = os.path.splitext(os.path.basename(q_file))[0] |
matching_token_files = [t_file for t_file in tokens_files if os.path.splitext(os.path.basename(t_file))[0] == base_name] |
if matching_token_files: |
process_files(q_file, matching_token_files[0]) |
print("All processing complete!") |
if __name__ == "__main__": |
main() |
import pandas as pd |
import numpy as np |
quotes_df = pd.read_csv("Working_files/Book/quotes.csv") |
non_quotes_df = pd.read_csv("Working_files/Book/non_quotes.csv") |
combined_df = pd.concat([quotes_df, non_quotes_df], ignore_index=True) |
combined_df.replace('None', np.nan, inplace=True) |
combined_df.dropna(subset=['Start Location'], inplace=True) |
combined_df["Start Location"] = combined_df["Start Location"].astype(int) |
sorted_df = combined_df.sort_values(by="Start Location") |
sorted_df.to_csv("Working_files/Book/book.csv", index=False) |
import os |
import tkinter as tk |
from tkinter import messagebox |
def is_single_line_file(filename): |
with open(filename, 'r') as file: |
return len(file.readlines()) <= 1 |
def copy_if_single_line(source_file, destination_file): |
if not os.path.isfile(source_file): |
return f"The source file '{source_file}' does not exist." |
elif is_single_line_file(destination_file): |
with open(source_file, 'r') as source: |
content = source.read() |
with open(destination_file, 'w') as dest: |
dest.write(content) |
print(f"Notification:") |
print(f"The 'book.csv' file was found to be empty, so all lines in the book will be said by the narrator.") |
return f"File '{destination_file}' had only one line or was empty and has been filled with the contents of '{source_file}'." |
else: |
return f"File '{destination_file}' had more than one line, and no action was taken." |
source_file = 'Working_files/Book/Other_book.csv' |
destination_file = 'Working_files/Book/book.csv' |
result = copy_if_single_line(source_file, destination_file) |
print(result) |
import pandas as pd |
import os |
import re |
def process_text(text): |
text = re.sub(r' (?=[^a-zA-Z0-9\s])', '', text) |
text = text.replace(" n’t", "n’t").replace("[", "(").replace("]", ")").replace("gon na", "gonna").replace("—————–", "").replace(" n't", "n't") |
return text |
def process_file(filename): |
df = pd.read_csv(filename) |
if "Text" in df.columns: |
df['Text'] = df['Text'].apply(lambda x: process_text(str(x))) |
df.to_csv(filename, index=False) |
print(f"Processed and saved {filename}") |
else: |
print(f"Column 'Text' not found in {filename}") |
def main(): |
folder_path = "Working_files/Book/" |
files = ["non_quotes.csv", "quotes.csv", "book.csv"] |
for filename in files: |
full_path = os.path.join(folder_path, filename) |
if os.path.exists(full_path): |
process_file(full_path) |
else: |
print(f"File {filename} not found in {folder_path}") |
if __name__ == "__main__": |
main() |
if calibre_installed(): |
process_and_split_csv("Working_files/Book/book.csv", 'NEWCHAPTERABC') |
remove_empty_text_rows("Working_files/Book/book.csv") |
import os |
import tkinter as tk |
from tkinter import messagebox |
def check_and_wipe_folder(directory_path): |
if not os.path.exists(directory_path): |
print(f"The directory {directory_path} does not exist!") |
return |
wav_files = [f for f in os.listdir(directory_path) if f.endswith('.wav')] |
if wav_files: |
response = "yes" |
if response == 'yes': |
for filename in wav_files: |
file_path = os.path.join(directory_path, filename) |
try: |
os.remove(file_path) |
print(f"Deleted: {file_path}") |
except Exception as e: |
print(f"Failed to delete {file_path}. Reason: {e}") |
else: |
print("Wipe operation cancelled by the user.") |
else: |
print("No audio clips from a previous session were found.") |
check_and_wipe_folder("Working_files/generated_audio_clips/") |
from TTS.api import TTS |
import tkinter as tk |
from tkinter import ttk, scrolledtext, messagebox, simpledialog, filedialog |
import threading |
import pandas as pd |
import random |
import os |
import time |
import os |
import pandas as pd |
import random |
import shutil |
import torch |
import torchaudio |
import time |
import pygame |
import nltk |
from nltk.tokenize import sent_tokenize |
from TTS.tts.configs.xtts_config import XttsConfig |
from TTS.tts.models.xtts import Xtts |
nltk.download('punkt') |
nltk.download('punkt', quiet=True) |
demo_text = "Imagine a world where endless possibilities await around every corner." |
csv_file="Working_files/Book/book.csv" |
data = pd.read_csv(csv_file) |
voice_actors_folder ="tortoise/voices/" |
voice_actors = [va for va in os.listdir(voice_actors_folder) if va != "cond_latent_example"] |
male_voice_actors = [va for va in voice_actors if va.endswith(".M")] |
female_voice_actors = [va for va in voice_actors if va.endswith(".F")] |
character_languages = {} |
models = TTS().list_models() |
if isinstance(models, list): |
print("good it's a list I can apply normal code for model list") |
selected_tts_model = models[0] |
else: |
tts_manager = TTS().list_models() |
all_models = tts_manager.list_models() |
models = all_models |
selected_tts_model = models[0] |
speaker_voice_map = {} |
multi_voice_model1 ="tts_models/en/vctk/vits" |
multi_voice_model2 ="tts_models/en/vctk/fast_pitch" |
multi_voice_model3 ="tts_models/ca/custom/vits" |
multi_voice_model_voice_list1 = [] |
multi_voice_model_voice_list2 = [] |
multi_voice_model_voice_list3 = [] |
voice_comboboxes = {} |
fast_voice_clone_models = [model for model in models if "multi-dataset" not in model] |
fast_voice_clone_models_dict = { |
model: "p363" if model == multi_voice_model1 else |
"VCTK_p226" if model == multi_voice_model2 else |
"pep" if model == multi_voice_model3 else |
None |
for model in fast_voice_clone_models |
} |
def on_silence_duration_change(*args): |
""" |
Update the SILENCE_DURATION_MS based on the entry value. |
""" |
try: |
new_duration = int(silence_duration_var.get()) |
if new_duration >= 0: |
SILENCE_DURATION_MS = new_duration |
else: |
raise ValueError |
except ValueError: |
messagebox.showerror("Invalid Input", "Please enter a valid non-negative integer.") |
def validate_integer(P): |
""" |
Validate if the entry is an integer. |
""" |
if P.isdigit() or P == "": |
return True |
else: |
messagebox.showerror("Invalid Input", "Please enter a valid integer.") |
return False |
def update_silence_duration(): |
""" |
Update the SILENCE_DURATION_MS based on the entry value. |
""" |
try: |
SILENCE_DURATION_MS = int(silence_duration_var.get()) |
except ValueError: |
messagebox.showerror("Invalid Input", "Please enter a valid integer.") |
def add_languages_to_csv(): |
df = pd.read_csv('Working_files/Book/book.csv') |
if 'language' not in df.columns: |
df['language'] = df['Speaker'].apply(lambda speaker: character_languages.get(speaker, 'en')) |
df.to_csv('Working_files/Book/book.csv', index=False) |
print("Added language data to the CSV file.") |
def add_voice_actors_to_csv(): |
df = pd.read_csv(csv_file) |
if 'voice_actor' not in df.columns: |
df['voice_actor'] = df['Speaker'].map(speaker_voice_map) |
df.to_csv(csv_file, index=False) |
print(f"Added voice actor data to {csv_file}") |
def get_random_voice_for_speaker(speaker): |
selected_voice_actors = voice_actors |
if speaker.endswith(".M") and male_voice_actors: |
selected_voice_actors = male_voice_actors |
elif speaker.endswith(".F") and female_voice_actors: |
selected_voice_actors = female_voice_actors |
if not selected_voice_actors: |
selected_voice_actors = voice_actors |
return random.choice(selected_voice_actors) |
def get_random_voice_for_speaker_fast(speaker): |
selected_voice_actors = voice_actors |
male_voice_actors = {"p226", "p228","p229","p230","p231","p232","p233","p234","p236","p238","p239","p241","p251","p252","p253","p254","p255","p256","p258","p262","p264","p265","p266","p267","p269","p272","p279","p281","p282","p285","p286","p287","p292","p298","p299","p301","p302","p307","p312","p313","p317","p318","p326","p340"} |
female_voice_actors = {"p225","p227","p237","p240","p243","p244","p245","p246","p247","p248","p249","p250","p257","p259","p260","p261","p263","p268","p270","p271","p273","p274","p275","p276","p277","p280","p283","p284","p288","p293","p294","p295","p297","p300","p303","p304","p305","p306","p308","p310","p311","p314","p316","p323","p329","p341","p343","p345","p347","p351","p360","p361","p362","p363","p364","p374"} |
if speaker.endswith(".M") and male_voice_actors: |
selected_voice_actors = male_voice_actors |
elif speaker.endswith(".F") and female_voice_actors: |
selected_voice_actors = female_voice_actors |
elif speaker.endswith(".?") and female_voice_actors: |
selected_voice_actors = male_voice_actors.union(female_voice_actors) |
if not selected_voice_actors: |
selected_voice_actors = male_voice_actors.union(female_voice_actors) |
return random.choice(list(selected_voice_actors)) |
def ensure_output_folder(): |
if not os.path.exists("Working_files/generated_audio_clips"): |
os.mkdir("Working_files/generated_audio_clips") |
def ensure_temp_folder(): |
if not os.path.exists("Working_files/temp"): |
os.mkdir("Working_files/temp") |
import random |
import time |
def select_voices(): |
global speaker_voice_map |
random.seed(int(time.time())) |
ensure_output_folder() |
total_rows = len(data) |
speaker_voice_map = {speaker: get_random_voice_for_speaker(speaker) for speaker in data['Speaker'].unique()} |
def review_and_modify_speaker_voices(): |
while True: |
print("\nCurrent voice selections:") |
for index, (speaker, voice) in enumerate(speaker_voice_map.items(), start=1): |
print(f"{index}. {speaker}: {voice}") |
change = "no" |
if change != 'yes': |
break |
try: |
selection = int(input("Enter the number of the speaker to change the voice for: ")) - 1 |
if selection < 0 or selection >= len(speaker_voice_map): |
raise ValueError("Selection out of range.") |
selected_speaker = list(speaker_voice_map.keys())[selection] |
except ValueError as e: |
print(f"Invalid input: {e}") |
continue |
print(f"Available voices for {selected_speaker}:") |
available_voices = [get_random_voice_for_speaker(selected_speaker) for _ in range(5)] |
for idx, voice in enumerate(available_voices, start=1): |
print(f"{idx}. {voice}") |
try: |
new_voice_selection = int(input("Select the new voice by number: ")) - 1 |
if new_voice_selection < 0 or new_voice_selection >= len(available_voices): |
raise ValueError("Selection out of range.") |
speaker_voice_map[selected_speaker] = available_voices[new_voice_selection] |
print(f"Voice for {selected_speaker} changed to {available_voices[new_voice_selection]}") |
except ValueError as e: |
print(f"Invalid input: {e}") |
review_and_modify_speaker_voices() |
print("Final voice assignments have been set.") |
def select_voices_fast(): |
random.seed(int(time.time())) |
ensure_output_folder() |
total_rows = len(data) |
for speaker in data['Speaker'].unique(): |
random_voice = get_random_voice_for_speaker_fast(speaker) |
speaker_voice_map[speaker] = random_voice |
for speaker, voice in speaker_voice_map.items(): |
print(f"Selected voice for {speaker}: {voice}") |
if speaker in voice_comboboxes: |
random_voice = get_random_voice_for_speaker_fast(speaker) |
voice_comboboxes[speaker].set(random_voice) |
print("Voices have been selected randomly.") |
select_voices() |
class Delimiter: |
def __init__(self, value): |
self._value = value |
self._callbacks = [] |
def get(self): |
return self._value |
def set(self, new_value): |
self._value = new_value |
self._run_callbacks() |
def _run_callbacks(self): |
for callback in self._callbacks: |
callback() |
def trace_add(self, mode, callback): |
if mode == "write": |
self._callbacks.append(callback) |
def update_chapter_keyword(): |
print("Chapter delimiter updated to:", chapter_delimiter_var.get()) |
if calibre_installed(): |
chapter_delimiter_var = Delimiter("NEWCHAPTERABC") |
else: |
chapter_delimiter_var = Delimiter("CHAPTER") |
try: |
pygame.mixer.init() |
print("mixer modual initialized successfully.") |
except pygame.error: |
print("mixer modual initialization failed") |
print(pygame.error) |
def update_voice_actor(speaker): |
selected_voice_actor = voice_comboboxes[speaker].get() |
speaker_voice_map[speaker] = selected_voice_actor |
print(f"Updated voice for {speaker}: {selected_voice_actor}") |
reference_files = list_reference_files(selected_voice_actor) |
if reference_files: |
random_file = random.choice(reference_files) |
try: |
pygame.mixer.music.stop() |
pygame.mixer.stop() |
if random_file.endswith('.mp3'): |
pygame.mixer.music.load(random_file) |
pygame.mixer.music.play() |
else: |
sound = pygame.mixer.Sound(random_file) |
sound.play() |
except Exception as e: |
print(f"Could not play the audio file: {e}") |
def split_long_sentence(sentence, max_length=230, max_pauses=8): |
""" |
Splits a sentence into parts based on length or number of pauses without recursion. |
:param sentence: The sentence to split. |
:param max_length: Maximum allowed length of a sentence. |
:param max_pauses: Maximum allowed number of pauses in a sentence. |
:return: A list of sentence parts that meet the criteria. |
""" |
parts = [] |
while len(sentence) > max_length or sentence.count(',') + sentence.count(';') + sentence.count('.') > max_pauses: |
possible_splits = [i for i, char in enumerate(sentence) if char in ',;.' and i < max_length] |
if possible_splits: |
split_at = possible_splits[-1] + 1 |
else: |
split_at = max_length |
parts.append(sentence[:split_at].strip()) |
sentence = sentence[split_at:].strip() |
parts.append(sentence) |
return parts |
def combine_wav_files(input_directory, output_directory, file_name): |
input_file_paths = [os.path.join(input_directory, f) for f in os.listdir(input_directory) if f.endswith(".wav")] |
input_file_paths.sort(key=lambda f: int(''.join(filter(str.isdigit, f)))) |
audio_tensors = [] |
for input_file_path in input_file_paths: |
waveform, sample_rate = torchaudio.load(input_file_path) |
audio_tensors.append(waveform) |
combined_audio = torch.cat(audio_tensors, dim=1) |
os.makedirs(output_directory, exist_ok=True) |
output_file_path = os.path.join(output_directory, file_name) |
torchaudio.save(output_file_path, combined_audio, sample_rate) |
print(f"Combined audio saved to {output_file_path}") |
def wipe_folder(directory_path): |
if not os.path.exists(directory_path): |
print(f"The directory {directory_path} does not exist!") |
return |
for filename in os.listdir(directory_path): |
file_path = os.path.join(directory_path, filename) |
if os.path.isfile(file_path): |
try: |
os.remove(file_path) |
print(f"Deleted: {file_path}") |
except Exception as e: |
print(f"Failed to delete {file_path}. Reason: {e}") |
tts_models = [ |
] |
tts_models = TTS().list_models() |
if isinstance(tts_models, list): |
print("good it's a list I can apply normal code for model list") |
selected_tts_model = models[0] |
else: |
tts_manager = TTS().list_models() |
all_models = tts_manager.list_models() |
tts_models = all_models |
def update_tts_model(event): |
global selected_tts_model |
selected_tts_model = tts_model_combobox.get() |
print(f"Selected TTS model: {selected_tts_model}") |
multilingual_tts_models = [model for model in tts_models if "multi-dataset" in model] |
multilingual_tts_models.append('StyleTTS2') |
models_to_remove = [multi_voice_model1, multi_voice_model2, multi_voice_model3] |
multilingual_tts_models = [model for model in multilingual_tts_models if model not in models_to_remove] |
global select_voices_button |
def update_voice_comboboxes(): |
global multi_voice_model_voice_list1 |
global multi_voice_model_voice_list2 |
global multi_voice_model_voice_list3 |
global voice_actors |
global female_voice_actors |
global male_voice_actors |
voice_actors = [va for va in os.listdir(voice_actors_folder) if va != "cond_latent_example"] |
male_voice_actors = [va for va in voice_actors if va.endswith(".M")] |
female_voice_actors = [va for va in voice_actors if va.endswith(".F")] |
filtered_tts_models = [model for model in tts_models if "multi-dataset" not in model] |
if not multi_voice_model_voice_list1: |
print(f"{multi_voice_model_voice_list1} is empty populating it...") |
multi_voice_model_voice_list1 = TTS(multi_voice_model1).speakers |
if not multi_voice_model_voice_list2: |
print(f"{multi_voice_model_voice_list2} is empty populating it...") |
multi_voice_model_voice_list2 = TTS(multi_voice_model2).speakers |
if not multi_voice_model_voice_list3: |
print(f"{multi_voice_model_voice_list3} is empty populating it...") |
multi_voice_model_voice_list3 = TTS(multi_voice_model3).speakers |
combined_values = voice_actors + filtered_tts_models |
combined_values += multi_voice_model_voice_list1 + multi_voice_model_voice_list2 + multi_voice_model_voice_list3 |
combined_values.remove(multi_voice_model1) |
combined_values.remove(multi_voice_model2) |
combined_values.remove(multi_voice_model3) |
for speaker, combobox in voice_comboboxes.items(): |
combobox['values'] = combined_values |
combobox.set(speaker_voice_map[speaker]) |
longest_string_length = max((len(str(value)) for value in combobox['values']), default=0) |
combobox.config(width=longest_string_length) |
filtered_tts_models = [model for model in tts_models if "multi-dataset" not in model] |
multilingual_tts_models.extend(filtered_tts_models) |
update_voice_comboboxes() |
def create_folder_if_not_exists(folder_path): |
if not os.path.exists(folder_path): |
os.makedirs(folder_path) |
print(f"Folder '{folder_path}' created successfully.") |
else: |
print(f"Folder '{folder_path}' already exists.") |
def list_reference_files(voice_actor): |
global multi_voice_model_voice_list1 |
global multi_voice_model_voice_list2 |
global multi_voice_model_voice_list3 |
if voice_actor in multi_voice_model_voice_list1: |
create_folder_if_not_exists(f"tortoise/_model_demo_voices/{multi_voice_model1}/{voice_actor}") |
reference_files = [os.path.join(f"tortoise/_model_demo_voices/{multi_voice_model1}/{voice_actor}", file) for file in os.listdir(f"tortoise/_model_demo_voices/{multi_voice_model1}/{voice_actor}") if file.endswith((".wav", ".mp3"))] |
if len(reference_files)==0: |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") |
fast_tts = TTS(multi_voice_model1, progress_bar=True).to(device) |
fast_tts.tts_to_file(text=demo_text , file_path=f"tortoise/_model_demo_voices/{multi_voice_model1}/{voice_actor}/demo.wav", speaker = voice_actor) |
reference_files = [os.path.join(f"tortoise/_model_demo_voices/{multi_voice_model1}/{voice_actor}", file) for file in os.listdir(f"tortoise/_model_demo_voices/{multi_voice_model1}/{voice_actor}") if file.endswith((".wav", ".mp3"))] |
return reference_files |
else: |
return reference_files |
elif voice_actor in multi_voice_model_voice_list2: |
create_folder_if_not_exists(f"tortoise/_model_demo_voices/{multi_voice_model2}/{voice_actor}") |
reference_files = [os.path.join(f"tortoise/_model_demo_voices/{multi_voice_model2}/{voice_actor}", file) for file in os.listdir(f"tortoise/_model_demo_voices/{multi_voice_model2}/{voice_actor}") if file.endswith((".wav", ".mp3"))] |
if len(reference_files)==0: |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") |
fast_tts = TTS(multi_voice_model2, progress_bar=True).to("cpu") |
fast_tts.tts_to_file(text=demo_text , file_path=f"tortoise/_model_demo_voices/{multi_voice_model2}/{voice_actor}/demo.wav", speaker = voice_actor) |
reference_files = [os.path.join(f"tortoise/_model_demo_voices/{multi_voice_model2}/{voice_actor}", file) for file in os.listdir(f"tortoise/_model_demo_voices/{multi_voice_model2}/{voice_actor}") if file.endswith((".wav", ".mp3"))] |
return reference_files |
else: |
return reference_files |
elif voice_actor in multi_voice_model_voice_list3: |
create_folder_if_not_exists(f"tortoise/_model_demo_voices/{multi_voice_model3}/{voice_actor}") |
reference_files = [os.path.join(f"tortoise/_model_demo_voices/{multi_voice_model3}/{voice_actor}", file) for file in os.listdir(f"tortoise/_model_demo_voices/{multi_voice_model3}/{voice_actor}") if file.endswith((".wav", ".mp3"))] |
if len(reference_files)==0: |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") |
fast_tts = TTS(multi_voice_model3, progress_bar=True).to(device) |
fast_tts.tts_to_file(text=demo_text , file_path=f"tortoise/_model_demo_voices/{multi_voice_model3}/{voice_actor}/demo.wav", speaker = voice_actor) |
reference_files = [os.path.join(f"tortoise/_model_demo_voices/{multi_voice_model3}/{voice_actor}", file) for file in os.listdir(f"tortoise/_model_demo_voices/{multi_voice_model3}/{voice_actor}") if file.endswith((".wav", ".mp3"))] |
return reference_files |
else: |
return reference_files |
elif "tts_models" in voice_actor: |
create_folder_if_not_exists("tortoise/_model_demo_voices") |
create_folder_if_not_exists(f"tortoise/_model_demo_voices/{voice_actor}") |
reference_files = [os.path.join(f"tortoise/_model_demo_voices/{voice_actor}", file) for file in os.listdir(f"tortoise/_model_demo_voices/{voice_actor}") if file.endswith((".wav", ".mp3"))] |
if len(reference_files)==0: |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") |
fast_tts = TTS(voice_actor, progress_bar=True).to(device) |
fast_tts.tts_to_file(text=demo_text , file_path=f"tortoise/_model_demo_voices/{voice_actor}/demo.wav") |
reference_files = [os.path.join(f"tortoise/_model_demo_voices/{voice_actor}", file) for file in os.listdir(f"tortoise/_model_demo_voices/{voice_actor}") if file.endswith((".wav", ".mp3"))] |
return reference_files |
else: |
return reference_files |
single_voice_actor_folder = f"{voice_actors_folder}{voice_actor}/" |
reference_files = [os.path.join(single_voice_actor_folder, file) for file in os.listdir(single_voice_actor_folder) if file.endswith((".wav", ".mp3"))] |
return reference_files |
languages = { |
'English': 'en', 'Spanish': 'es', 'French': 'fr', 'German': 'de', |
'Italian': 'it', 'Portuguese': 'pt', 'Polish': 'pl', 'Turkish': 'tr', |
'Russian': 'ru', 'Dutch': 'nl', 'Czech': 'cs', 'Arabic': 'ar', |
'Chinese': 'zh-cn', 'Japanese': 'ja', 'Hungarian': 'hu', 'Korean': 'ko' |
} |
current_language = 'en' |
current_model ="" |
tts = None |
STTS = None |
def generate_file_ids(csv_file, chapter_delimiter): |
data = pd.read_csv(csv_file) |
if 'audio_id' not in data.columns: |
data['audio_id'] = [''] * len(data) |
chapter_num = 0 |
for index, row in data.iterrows(): |
text = row['Text'] |
print(f"{text}") |
if chapter_delimiter in text: |
chapter_num = chapter_num +1 |
data.at[index, 'audio_id'] = f"audio_{index}_{chapter_num}" |
data.to_csv(csv_file, index=False) |
print(f"'audio_id' column has been updated in {csv_file}") |
generate_file_ids(csv_file, chapter_delimiter_var.get()) |
import os |
import torch |
import torchaudio |
from TTS.tts.configs.xtts_config import XttsConfig |
from TTS.tts.models.xtts import Xtts |
import time |
import sys |
def install(package): |
subprocess.check_call([sys.executable, "-m", "pip", "install", package]) |
def fineTune_audio_generate(text, file_path, speaker_wav, language, voice_actor): |
global current_model |
global tts |
start_time = time.time() |
device = "cuda" if torch.cuda.is_available() else "cpu" |
CONFIG_PATH = f"tortoise/voices/{voice_actor}/model/config.json" |
TOKENIZER_PATH = f"tortoise/voices/{voice_actor}/model/vocab.json_" |
XTTS_CHECKPOINT = f"tortoise/voices/{voice_actor}/model/model.pth" |
SPEAKER_REFERENCE = speaker_wav |
OUTPUT_WAV_PATH = file_path |
if current_model != voice_actor: |
print(f"found fine tuned for voice actor: {voice_actor}: loading custom model...") |
config = XttsConfig() |
config.load_json(CONFIG_PATH) |
if 'tts' not in locals(): |
tts = Xtts.init_from_config(config) |
tts.load_checkpoint(config, checkpoint_path=XTTS_CHECKPOINT, vocab_path=TOKENIZER_PATH, use_deepspeed=False) |
if device == "cuda": |
tts.cuda() |
if device == "cpu": |
tts.cpu() |
current_model = voice_actor |
else: |
print(f"found fine tuned model for voice actor: {voice_actor} but {voice_actor} model is already loaded") |
print("Computing speaker latents...") |
gpt_cond_latent, speaker_embedding = tts.get_conditioning_latents(audio_path=[SPEAKER_REFERENCE]) |
print("Inference...") |
out = tts.inference( |
text, |
language, |
gpt_cond_latent, |
speaker_embedding, |
temperature=0.7, |
) |
torchaudio.save(OUTPUT_WAV_PATH, torch.tensor(out["wav"]).unsqueeze(0), 24000) |
end_time = time.time() |
elapsed_time = end_time - start_time |
print(f"Time taken for execution: {elapsed_time:.2f} seconds") |
def select_tts_model(): |
models = TTS().list_models() |
additional_models = ["StyleTTS2"] |
all_models = models + additional_models |
current_model = "StyleTTS2" |
while True: |
print(f"The TTS model currently selected is {current_model}.") |
response = "yes" |
if response == 'yes': |
return current_model |
elif response == 'no': |
print("Available models:") |
for model in all_models: |
print(model) |
while True: |
selected_model = input("Please type the name of one of the above models: ").strip() |
if selected_model in all_models: |
current_model = selected_model |
break |
else: |
print("Invalid model. Please select a model from the list.") |
else: |
print("Please answer 'yes' or 'no'.") |
import os |
import shutil |
from tkinter import filedialog |
def clone_new_voice(): |
while True: |
confirm = "no" |
if confirm == 'yes': |
voice_actor_name = input("Enter the name of the new voice actor: ") |
voice_actor_gender = input("Enter the gender of the new voice actor (M/F/?): ") |
new_voice_path = f"tortoise/voices/{voice_actor_name}.{voice_actor_gender}" |
if not os.path.exists(new_voice_path): |
os.makedirs(new_voice_path) |
print(f"New directory created at: {new_voice_path}") |
print("Please enter the path to the voice sample file to copy:") |
sample_file = input("Enter file path: ") |
if os.path.exists(sample_file): |
shutil.copy(sample_file, new_voice_path) |
print("Sample file copied successfully.") |
else: |
print("The file does not exist. Please check the path and try again.") |
else: |
print("Voice actor folder already exists.") |
repeat = input("Do you want to clone another new voice? (yes/no): ").lower() |
if repeat != 'yes': |
break |
elif confirm == 'no': |
break |
else: |
print("Please answer 'yes' or 'no'.") |
def add_fine_tuned_model(): |
while True: |
confirm = "no" |
if confirm == 'yes': |
base_directory = "tortoise/voices/" |
folders = [folder for folder in os.listdir(base_directory) if os.path.isdir(os.path.join(base_directory, folder))] |
print("Select a voice actor to add a fine-tuned model to:") |
for index, folder in enumerate(folders): |
print(f"{index}: {folder}") |
selected_index = int(input("Enter the number corresponding to the voice actor: ")) |
selected_folder = folders[selected_index] |
model_path = os.path.join(base_directory, selected_folder, "model") |
if not os.path.exists(model_path): |
os.makedirs(model_path) |
print("Please enter the path to the folder containing fine-tuned XTTS model files to copy from:") |
source_folder = input("Enter folder path: ") |
if os.path.isdir(source_folder): |
for file in os.listdir(source_folder): |
source_file = os.path.join(source_folder, file) |
destination_file = os.path.join(model_path, file) |
shutil.copy2(source_file, destination_file) |
print(f"Files copied successfully to {model_path}") |
else: |
print("The specified directory does not exist. Please check the path and try again.") |
repeat = input("Do you want to add another fine-tuned model? (yes/no): ").lower() |
if repeat != 'yes': |
break |
elif confirm == 'no': |
break |
else: |
print("Please answer 'yes' or 'no'.") |
def ask_if_user_wants_to_add_fine_tuned_xtts_model_or_clone_a_voice(): |
while True: |
print("\n1. Clone a new voice") |
print("2. Add a fine-tuned XTTS model to a voice actor") |
print("3. Exit") |
choice = "3" |
if choice == '1': |
clone_new_voice() |
elif choice == '2': |
add_fine_tuned_model() |
elif choice == '3': |
print("Exiting the program.") |
break |
else: |
print("Invalid choice. Please try again.") |
def select_language_terminal(): |
default_language = "en" |
language = default_language |
change_lang = "no" |
if change_lang == "yes": |
languages = ['en', 'es', 'fr', 'de', 'it', 'pt', 'pl', 'tr', 'ru', 'nl', 'cs', 'ar', 'zh-cn', 'hu', 'ko', 'ja', 'hi'] |
print("Available languages:") |
for i, lang in enumerate(languages): |
print(f"{i + 1}. {lang}") |
while True: |
try: |
choice = int(input("Select a language by number: ")) |
language = languages[choice - 1] |
break |
except (IndexError, ValueError): |
print("Invalid selection. Please try again.") |
confirm = input(f"Confirm changing language to {language}? (yes/no): ").strip().lower() |
if confirm == "yes": |
print(f"Language set to {language}.") |
else: |
print("Language change canceled. Using default English.") |
language = default_language |
else: |
print("No language change requested. Using default English.") |
return language |
from tqdm import tqdm |
def generate_audio(): |
ask_if_user_wants_to_add_fine_tuned_xtts_model_or_clone_a_voice() |
selected_tts_model = select_tts_model() |
use_narrator_voice = "no" |
while use_narrator_voice not in ['yes', 'no']: |
print("Invalid input. Please type 'yes' or 'no'.") |
use_narrator_voice = input("Do you want to generate all audio with the Narrator voice? (yes/no): ").strip().lower() |
use_narrator_voice = use_narrator_voice == 'yes' |
global current_language |
current_language = select_language_terminal() |
start_timez = time.time() |
global multi_voice_model_voice_list1 |
global multi_voice_model_voice_list2 |
global multi_voice_model_voice_list3 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") |
global current_model |
global STTS |
ensure_temp_folder() |
random.seed(int(time.time())) |
ensure_output_folder() |
total_rows = len(data) |
chapter_num = 0 |
add_voice_actors_to_csv() |
add_languages_to_csv() |
for index, row in tqdm(data.iterrows(), total=data.shape[0], desc="Generating AudioBook"): |
speaker = row['Speaker'] |
text = row['Text'] |
language_code = character_languages.get(speaker, current_language) |
if calibre_installed: |
if "NEWCHAPTERABC" in text: |
chapter_num += 1 |
print(f"chapter num: {chapter_num}") |
text = text.replace("NEWCHAPTERABC", "") |
elif CHAPTER_KEYWORD in text.upper(): |
chapter_num += 1 |
print(f"chapter num: {chapter_num}") |
if use_narrator_voice: |
print(f"All audio is being generated with the Narrator voice.") |
voice_actor = speaker_voice_map.get("Narrator") |
else: |
voice_actor = speaker_voice_map[speaker] |
sentences = sent_tokenize(text) |
audio_tensors = [] |
temp_count =0 |
for sentence in sentences: |
fragments = split_long_sentence(sentence) |
for fragment in fragments: |
if 'multilingual' in selected_tts_model: |
language_code = character_languages.get(speaker, current_language) |
else: |
language_code = None |
print(f"Voice actor: {voice_actor}, {current_language}") |
temp_count = temp_count +1 |
if voice_actor in multi_voice_model_voice_list1: |
print(f"{voice_actor} is a fast model voice: {multi_voice_model1}") |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") |
if current_model != multi_voice_model1: |
fast_tts = TTS(multi_voice_model1, progress_bar=True).to("cpu") |
current_model = multi_voice_model1 |
print(f"The model used in fast_tts has been changed to {current_model}") |
fast_tts.tts_to_file(text=fragment, file_path=f"Working_files/temp/{temp_count}.wav", speaker=voice_actor) |
elif voice_actor in multi_voice_model_voice_list2: |
print(f"{voice_actor} is a fast model voice: {multi_voice_model2}") |
if current_model != multi_voice_model2: |
fast_tts = TTS(multi_voice_model2, progress_bar=True).to("cpu") |
current_model = multi_voice_model2 |
print(f"The model used in fast_tts has been changed to {current_model}") |
fast_tts.tts_to_file(text=fragment, file_path=f"Working_files/temp/{temp_count}.wav", speaker=voice_actor) |
elif voice_actor in multi_voice_model_voice_list3: |
print(f"{voice_actor} is a fast model voice: {multi_voice_model3}") |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") |
if current_model != multi_voice_model3: |
fast_tts = TTS(multi_voice_model3, progress_bar=True).to("cpu") |
current_model = multi_voice_model3 |
print(f"The model used in fast_tts has been changed to {current_model}") |
fast_tts.tts_to_file(text=fragment, file_path=f"Working_files/temp/{temp_count}.wav", speaker=voice_actor) |
elif "tts_models" in voice_actor and "multi-dataset" not in voice_actor: |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") |
if current_model != voice_actor: |
fast_tts = TTS(voice_actor, progress_bar=True).to(device) |
current_model = voice_actor |
print(f"The model used in fast_tts has been changed to {current_model}") |
print(f"Model for this character has been switched to: {voice_actor} by user") |
try: |
fast_tts.tts_to_file(text=fragment, file_path=f"Working_files/temp/{temp_count}.wav") |
except ValueError as e: |
if str(e) == "Model is multi-lingual but no `language` is provided.": |
print("attempting to correct....") |
fast_tts.tts_to_file(text=fragment, file_path=f"Working_files/temp/{temp_count}.wav",language=language_code) |
print("Successfully Corrected!") |
elif os.path.exists(f"tortoise/voices/{voice_actor}/model") and os.path.isdir(f"tortoise/voices/{voice_actor}/model") and 'xtts' in selected_tts_model: |
speaker_wavz=list_reference_files(voice_actor) |
fineTune_audio_generate(text=fragment, file_path=f"Working_files/temp/{temp_count}.wav", speaker_wav=speaker_wavz[0], language=language_code, voice_actor=voice_actor) |
elif "multilingual" in selected_tts_model and "multi-dataset" in selected_tts_model: |
if 'tts' not in locals(): |
tts = TTS(selected_tts_model, progress_bar=True).to(device) |
try: |
if "bark" in selected_tts_model: |
print(f"{selected_tts_model} is bark so multilingual but has no language code") |
tts.tts_to_file(text=fragment, file_path=f"Working_files/temp/{temp_count}.wav", speaker_wav=list_reference_files(voice_actor)) |
else: |
print(f"{selected_tts_model} is multi-dataset and multilingual") |
tts.tts_to_file(text=fragment, file_path=f"Working_files/temp/{temp_count}.wav", speaker_wav=list_reference_files(voice_actor), language=language_code) |
except ValueError as e: |
if str(e) == "Model is not multi-lingual but `language` is provided.": |
print("Caught ValueError: Model is not multi-lingual. Ignoring the language parameter.") |
tts.tts_to_file(text=fragment, file_path=f"Working_files/temp/{temp_count}.wav", speaker_wav=list_reference_files(voice_actor)) |
elif "multilingual" in selected_tts_model: |
print(f"{selected_tts_model} is multilingual") |
if 'tts' not in locals(): |
tts = TTS(selected_tts_model, progress_bar=True).to(device) |
tts.tts_to_file(text=fragment, file_path=f"Working_files/temp/{temp_count}.wav", language=language_code) |
elif "multi-dataset" in selected_tts_model: |
print(f"{selected_tts_model} is multi-dataset") |
if 'tts' not in locals(): |
tts = TTS(selected_tts_model, progress_bar=True).to(device) |
tts.tts_to_file(text=fragment, file_path=f"Working_files/temp/{temp_count}.wav") |
elif 'StyleTTS2' in selected_tts_model: |
print(f'{selected_tts_model} model is selected for voice cloning') |
elif selected_tts_model in fast_voice_clone_models_dict: |
print(f"Using voice conversion voice cloning method and the selected model for this is {selected_tts_model}") |
if current_model != selected_tts_model: |
if "tts" not in locals(): |
tts = TTS(selected_tts_model).to("cpu") |
current_model = selected_tts_model |
try: |
tts.tts_with_vc_to_file( |
fragment, |
speaker_wav=list_reference_files(voice_actor)[0], |
file_path=f"Working_files/temp/{temp_count}.wav", |
speaker=fast_voice_clone_models_dict[selected_tts_model] |
) |
except Exception as e: |
print(f"An error occurred but was ignored: {e}") |
print("But continuing anyway but you should probs look at that error: its probably that the input for the tts model is too short so idk find a way to fix it if it runs into an issue like this:") |
else: |
print(f"{selected_tts_model} is neither multi-dataset nor multilingual") |
if 'tts' not in locals(): |
tts = TTS(selected_tts_model, progress_bar=True).to(device) |
tts.tts_to_file(text=fragment,file_path=f"Working_files/temp/{temp_count}.wav") |
temp_input_directory = "Working_files/temp" |
output_directory = "Working_files/generated_audio_clips" |
combine_wav_files(temp_input_directory, output_directory, f"audio_{index}_{chapter_num}.wav") |
wipe_folder("Working_files/temp") |
end_timez = time.time() |
durationz = end_timez - start_timez |
print("GENERATION TIME:" + str(durationz)) |
from functools import partial |
def format_time(seconds): |
""" |
Formats time in seconds to a more readable string with minutes, hours, days, and years if applicable. |
""" |
minute = 60 |
hour = minute * 60 |
day = hour * 24 |
year = day * 365 |
years = seconds // year |
seconds %= year |
days = seconds // day |
seconds %= day |
hours = seconds // hour |
seconds %= hour |
minutes = seconds // minute |
seconds %= minute |
time_string = "" |
if years > 0: |
time_string += f"{years:.0f} year{'s' if years > 1 else ''} " |
if days > 0: |
time_string += f"{days:.0f} day{'s' if days > 1 else ''} " |
if hours > 0: |
time_string += f"{hours:.0f} hour{'s' if hours > 1 else ''} " |
if minutes > 0: |
time_string += f"{minutes:.0f} min " |
time_string += f"{seconds:.0f} sec" |
return time_string.strip() |
def update_progress(index, total, row_text): |
current_time = time.time() |
elapsed_time = current_time - start_time |
global total_chars_processed, processed_rows_count |
total_chars_processed += len(row_text) |
processed_rows_count += 1 |
progress = (index + 1) / total * 100 |
if processed_rows_count > 0: |
average_chars_per_row = total_chars_processed / processed_rows_count |
estimated_chars_remaining = average_chars_per_row * (total - processed_rows_count) |
average_time_per_char = elapsed_time / total_chars_processed |
estimated_time_remaining = average_time_per_char * estimated_chars_remaining |
remaining_time_string = format_time(estimated_time_remaining) |
else: |
remaining_time_string = "Calculating..." |
progress_label.config(text=f"{progress:.2f}% done ({index+1}/{total} rows) - {remaining_time_string}") |
root.update_idletasks() |
start_time = time.time() |
total_chars_processed = 0 |
processed_rows_count = 0 |
def create_scrollable_frame(parent, height): |
canvas = tk.Canvas(parent, height=height) |
scrollbar = ttk.Scrollbar(parent, orient="vertical", command=canvas.yview) |
scrollable_frame = ttk.Frame(canvas) |
canvas.configure(yscrollcommand=scrollbar.set) |
canvas.create_window((0, 0), window=scrollable_frame, anchor="nw") |
scrollable_frame.bind( |
"<Configure>", |
lambda e: canvas.configure(scrollregion=canvas.bbox("all")) |
) |
canvas.pack(side="left", fill="both", expand=True) |
scrollbar.pack(side="right", fill="y") |
return scrollable_frame |
def update_chapter_keyword(*args): |
CHAPTER_KEYWORD = chapter_delimiter_var.get() |
chapter_delimiter_var.trace_add("write", update_chapter_keyword) |
generate_audio() |
import os |
import shutil |
def wipe_folder(folder_path): |
if os.path.exists(folder_path) and os.path.isdir(folder_path): |
print(f"Folder '{folder_path}' found. Proceeding to wipe...") |
shutil.rmtree(folder_path) |
print(f"Folder '{folder_path}' has been wiped.") |
else: |
print(f"Folder '{folder_path}' does not exist. No action taken.") |
folder_to_wipe = "Final_combined_output_audio" |
wipe_folder(folder_to_wipe) |
import os |
import pandas as pd |
import torch |
import torchaudio |
import pygame |
colors = ['#FFB6C1', '#ADD8E6', '#FFDAB9', '#98FB98', '#D8BFD8'] |
speaker_colors = {} |
currently_playing = None |
INPUT_FOLDER = "Working_files/generated_audio_clips" |
OUTPUT_FOLDER = "Final_combined_output_audio" |
try: |
pygame.mixer.init() |
print("mixer modual initialized successfully.") |
except pygame.error: |
print("mixer modual initialization failed") |
print(pygame.error) |
def combine_audio_files(silence_duration_ms): |
folder_path = os.path.join(os.getcwd(), INPUT_FOLDER) |
files = sorted([f for f in os.listdir(folder_path) if f.startswith("audio_") and f.endswith(".wav")], |
key=lambda f: (int(f.split('_')[2].split('.')[0]), int(f.split('_')[1].split('.')[0]))) |
chapter_files = {} |
for file in files: |
chapter_num = int(file.split('_')[2].split('.')[0]) |
if chapter_num not in chapter_files: |
chapter_files[chapter_num] = [] |
chapter_files[chapter_num].append(file) |
for chapter_num, chapter_file_list in chapter_files.items(): |
combined_tensor = torch.Tensor() |
for index, file in enumerate(chapter_file_list): |
waveform, sample_rate = torchaudio.load(os.path.join(folder_path, file)) |
channels = waveform.shape[0] |
silence_tensor = torch.zeros(channels, int(silence_duration_ms * sample_rate / 1000)) |
combined_tensor = torch.cat([combined_tensor, waveform, silence_tensor], dim=1) |
print(f"Processing Chapter {chapter_num} - File {index + 1}/{len(chapter_file_list)}: {file}") |
if not os.path.exists(os.path.join(os.getcwd(), OUTPUT_FOLDER)): |
os.makedirs(os.path.join(os.getcwd(), OUTPUT_FOLDER)) |
output_path = os.path.join(os.getcwd(), OUTPUT_FOLDER, f"chapter_{chapter_num}.wav") |
torchaudio.save(output_path, combined_tensor, sample_rate) |
print("Combining audio files complete!") |
combine_audio_files(SILENCE_DURATION_MS) |
import os |
import subprocess |
from pydub import AudioSegment |
import shlex |
def sort_chapters(file): |
number_part = re.findall(r'\d+', file) |
if number_part: |
return int(number_part[0]) |
return 0 |
def extract_ebook_metadata(ebook_file): |
try: |
metadata_cmd = ['ebook-meta', ebook_file] |
metadata_output = subprocess.run(metadata_cmd, capture_output=True, text=True).stdout |
metadata = {} |
for line in metadata_output.splitlines(): |
if ':' in line: |
key, value = line.split(':', 1) |
metadata[key.strip()] = value.strip() |
output_image = os.path.join('/tmp', os.path.basename(ebook_file) + '.jpg') |
subprocess.run(['ebook-meta', ebook_file, '--get-cover', output_image], check=True) |
if not os.path.exists(output_image): |
output_image = None |
return output_image, metadata |
except Exception as e: |
print(f"Error extracting eBook metadata: {e}") |
return None, {} |
def generate_chapter_metadata(wav_files, metadata_filename): |
with open(metadata_filename, 'w') as file: |
file.write(";FFMETADATA1\n") |
start_time = 0 |
for index, wav_file in enumerate(wav_files): |
duration = len(AudioSegment.from_wav(wav_file)) |
end_time = start_time + duration |
file.write(f"[CHAPTER]\nTIMEBASE=1/1000\nSTART={start_time}\nEND={end_time}\ntitle=Chapter {index+1:02d}\n") |
start_time = end_time |
def combine_wav_to_m4b_ffmpeg(wav_files, m4b_filename, cover_image, metadata_filename, metadata): |
print("Combining WAV files into an M4B audiobook using FFmpeg...") |
with open('file_list.txt', 'w') as file: |
for wav_file in wav_files: |
file.write(f"file '{shlex.quote(wav_file)}'\n") |
ffmpeg_cmd = f"ffmpeg -f concat -safe 0 -i file_list.txt -c copy combined.wav" |
ffmpeg_cmd += f" && ffmpeg -i combined.wav -i {shlex.quote(metadata_filename)}" |
if cover_image: |
ffmpeg_cmd += f" -i {shlex.quote(cover_image)}" |
for key, value in metadata.items(): |
ffmpeg_cmd += f" -metadata {key}=\"{value}\"" |
ffmpeg_cmd += f" -map_metadata 1" |
if cover_image: |
ffmpeg_cmd += f" -map 0 -map 2" |
ffmpeg_cmd += f" -c:a aac -b:a 192k" |
if cover_image: |
ffmpeg_cmd += f" -c:v copy -disposition:v:0 attached_pic" |
ffmpeg_cmd += f" {shlex.quote(m4b_filename)}" |
os.system(ffmpeg_cmd) |
print(f"M4B audiobook created: {m4b_filename}") |
os.remove('file_list.txt') |
if os.path.exists('combined.wav'): |
os.remove('combined.wav') |
os.remove(metadata_filename) |
if cover_image and os.path.exists(cover_image): |
os.remove(cover_image) |
def convert_all_wav_to_m4b(input_dir, ebook_file, output_dir, audiobook_name): |
if not os.path.exists(output_dir): |
os.makedirs(output_dir) |
print(f"Created output directory: {output_dir}") |
cover_image, ebook_metadata = extract_ebook_metadata(ebook_file) |
wav_files = [os.path.join(input_dir, f) for f in os.listdir(input_dir) if f.endswith('.wav')] |
wav_files.sort(key=sort_chapters) |
m4b_filename = os.path.join(output_dir, f'{audiobook_name}.m4b') |
metadata_filename = 'chapter_metadata.txt' |
metadata = { |
'artist': ebook_metadata.get('Author(s)', 'Unknown Author'), |
'album': ebook_metadata.get('Series', 'Unknown Series'), |
'Title': ebook_metadata.get('Title', f'{audiobook_name}.m4b'), |
'date': ebook_metadata.get('Published', 'Unknown Year'), |
'Genre': ebook_metadata.get('Tags', 'Unknown Genre'), |
'Comment': ebook_metadata.get('Tags', 'No description available.'), |
} |
m4b_filename = ebook_metadata.get('Title', f"audiobook_name") |
m4b_filename = os.path.join(output_dir, f'{m4b_filename}.m4b') |
generate_chapter_metadata(wav_files, metadata_filename) |
combine_wav_to_m4b_ffmpeg(wav_files, m4b_filename, cover_image, metadata_filename, metadata) |
input_dir = "Final_combined_output_audio" |
ebook_file = ebook_file_path |
output_dir = 'output_audiobooks' |
audiobook_name = os.path.splitext(os.path.basename(ebook_file))[0] |
convert_all_wav_to_m4b(input_dir, ebook_file, output_dir, audiobook_name) |
from moviepy.editor import * |
def convert_wav_to_mp4(wav_filename, mp4_filename): |
audio = AudioFileClip(wav_filename) |
audio.write_audiofile(mp4_filename, codec='aac') |
def convert_all_wav_to_mp4(): |
output_dir = "Final_combined_output_audio" |
wav_files = [f for f in os.listdir(output_dir) if f.endswith('.wav')] |
for wav_file in wav_files: |
wav_filename = os.path.join(output_dir, wav_file) |
mp4_filename = os.path.join(output_dir, wav_file.replace('.wav', '.mp4')) |
convert_wav_to_mp4(wav_filename, mp4_filename) |
print(f"{wav_filename} has been converted to {mp4_filename}.") |
os.remove(wav_filename) |
print(f"{wav_filename} as been deleted.") |
convert_all_wav_to_mp4() |
print("Adding Book Artwork to mp4 chatper files if calibre is installed") |
import os |
import subprocess |
def extract_cover_image_calibre(ebook_file): |
""" |
Extracts the cover image from an eBook file using Calibre's ebook-meta tool. |
Args: |
ebook_file (str): The path to the eBook file. |
Returns: |
str: The path to the extracted cover image or None if not found. |
""" |
output_image = os.path.join('/tmp', os.path.basename(ebook_file) + '.jpg') |
try: |
subprocess.run(['ebook-meta', ebook_file, '--get-cover', output_image], check=True) |
if os.path.exists(output_image): |
return output_image |
else: |
return None |
except Exception as e: |
print(f"Error extracting cover image: {e}") |
return None |
def set_cover_to_mp4(cover_image, mp4_folder): |
""" |
Sets the extracted cover image to all mp4 files in a specified folder. |
Args: |
cover_image (str): The path to the cover image. |
mp4_folder (str): The path to the folder containing mp4 files. |
""" |
if not cover_image or not os.path.exists(cover_image): |
print("Cover image not found.") |
return |
for file in os.listdir(mp4_folder): |
if file.lower().endswith('.mp4'): |
mp4_path = os.path.join(mp4_folder, file) |
os.system(f'ffmpeg -i "{mp4_path}" -i "{cover_image}" -map 0 -map 1 -c copy -disposition:v:1 attached_pic "{mp4_path}.temp.mp4"') |
os.rename(f"{mp4_path}.temp.mp4", mp4_path) |
ebook_file = ebook_file_path |
mp4_folder = OUTPUT_FOLDER |
cover_image = extract_cover_image_calibre(ebook_file) |
set_cover_to_mp4(cover_image, mp4_folder) |