import typer from typing import Annotated import json from pathlib import Path, PureWindowsPath from dataclasses import dataclass import shutil from rich.progress import track app = typer.Typer() @dataclass class TranscribedDialog: voice_file: Path transscript: str @property def exists(self) -> bool: return self.voice_file.exists() @property def is_valid(self) -> bool: return self.exists and self.transscript != "" def parse_dialogs(dialogs_file: Path, voice_name_filter: str) -> list[dict]: dialogs = json.loads(dialogs_file.read_text()) return [dialog for dialog in dialogs if dialog["Voice"] == voice_name_filter] def create_transcribed_dialog(dialog: dict, voice_directory: Path) -> TranscribedDialog: original_voice_reference: str = dialog["Voice File"] relative_voice_path = Path( PureWindowsPath( original_voice_reference.replace("\n", "\\n").replace("\r", "\\r") ) ) resolved_voice_path = voice_directory / relative_voice_path.with_suffix(".wav") print(resolved_voice_path) return TranscribedDialog( voice_file=resolved_voice_path, transscript=dialog["Response"] ) @app.command() def create_training_data( dialog_file: Annotated[ Path, typer.Argument( file_okay=True, dir_okay=False, readable=True, resolve_path=True, help='csv from "https://www.nexusmods.com/fallout4/mods/7273" converted to json' ), ], voice_directory: Annotated[ Path, typer.Argument(file_okay=False, dir_okay=True, readable=True), ], voice_name: Annotated[ str, typer.Argument( help='Name of the voice. Indicated as "voice" in the dialog json file' ), ], output_path: Annotated[ Path, typer.Option( file_okay=False, dir_okay=True, writable=True, resolve_path=True, help="Output directory. Will contain metadata.csv and wav/ directory", ), ] = Path("piper-training-data"), ): # Check if the voice_directory has any paths with capital letters for file in voice_directory.rglob("**/*"): if file.name != file.name.lower(): typer.echo( f"Found file or directory with uppercase letters: {file.name}. Please rename to lowercase" ) raise typer.Exit(code=1) filtered_dialogs = parse_dialogs(dialog_file, voice_name) if not filtered_dialogs: typer.echo(f"No dialogs found for voice {voice_name}") raise typer.Exit(code=2) typer.echo(f"Found {len(filtered_dialogs)} dialogs for voice {voice_name}") dialogs = [ create_transcribed_dialog(dialog, voice_directory) for dialog in filtered_dialogs ] valid_dialogs = [dialog for dialog in dialogs if dialog.is_valid] typer.echo( f"Found {len(valid_dialogs)}/{len(dialogs)} ({len(valid_dialogs)/len(dialogs):.2%}) of dialogs to be valid for voice {voice_name}" ) # Generate metadata file metadata_file = output_path / "metadata.csv" wav_directory = output_path / "wav" wav_directory.mkdir(exist_ok=True, parents=True) with metadata_file.open("w") as f: for i, valid_dialog in track( enumerate(valid_dialogs), description="Generating training data...", total=len(valid_dialogs), ): f.write(f"{i}|{valid_dialog.transscript.replace('|', ';')}\n") shutil.copy(valid_dialog.voice_file, wav_directory / f"{i}.wav") def walk_and_rename_to_lower_case(directory: Path) -> None: for file in directory.iterdir(): file = file.rename(file.with_name(file.name.lower())) if file.is_dir(): walk_and_rename_to_lower_case(file) @app.command() def rename_to_lower_case( voice_directory: Annotated[ Path, typer.Argument(file_okay=False, dir_okay=True, readable=True) ], ) -> None: walk_and_rename_to_lower_case(voice_directory) if __name__ == "__main__": app()