voices
/

VCTK_European_English_Females

Inference Endpoints

Model card Files Files and versions Community

VCTK_European_English_Females / prepare_model.py

jvision's picture

first commit

46583ee over 1 year ago

history blame contribute delete

3.91 kB

	import json
	import os
	import subprocess

	# Load the data from the provided dictionary
	data = {
	"VCTK_p283": {
	"age": 24,
	"gender": "F",
	"accents": "Irish",
	"region": "Cork"
	},
	"VCTK_p266": {
	"age": 22,
	"gender": "F",
	"accents": "Irish",
	"region": "Athlone"
	},
	"VCTK_p288": {
	"age": 22,
	"gender": "F",
	"accents": "Irish",
	"region": "Dublin"
	},
	"VCTK_p295": {
	"age": 23,
	"gender": "F",
	"accents": "Irish",
	"region": "Dublin"
	},
	"VCTK_p293": {
	"age": 22,
	"gender": "F",
	"accents": "NorthernIrish",
	"region": "Belfast"
	},
	"VCTK_p238": {
	"age": 22,
	"gender": "F",
	"accents": "NorthernIrish",
	"region": "Belfast"
	},
	"VCTK_p261": {
	"age": 26,
	"gender": "F",
	"accents": "NorthernIrish",
	"region": "Belfast"
	},
	"VCTK_p351": {
	"age": 21,
	"gender": "F",
	"accents": "NorthernIrish",
	"region": "Derry"
	},
	"VCTK_p249": {
	"age": 22,
	"gender": "F",
	"accents": "Scottish",
	"region": "Aberdeen"
	},
	"VCTK_p234": {
	"age": 22,
	"gender": "F",
	"accents": "Scottish",
	"region": "West Dumfries"
	},
	"VCTK_p262": {
	"age": 23,
	"gender": "F",
	"accents": "Scottish",
	"region": "Edinburgh"
	},
	"VCTK_p264": {
	"age": 23,
	"gender": "F",
	"accents": "Scottish",
	"region": "West Lothian"
	},
	"VCTK_p265": {
	"age": 23,
	"gender": "F",
	"accents": "Scottish",
	"region": "Ross"
	},
	"VCTK_p253": {
	"age": 22,
	"gender": "F",
	"accents": "Welsh",
	"region": "Cardiff"
	},
	"VCTK_p313": {
	"age": 24,
	"gender": "F",
	"accents": "Irish",
	"region": "County Down"
	},
	"VCTK_p340": {
	"age": 18,
	"gender": "F",
	"accents": "Irish",
	"region": "Dublin"
	},
	"VCTK_p335": {
	"age": 25,
	"gender": "F",
	"accents": "NewZealand",
	"region": "English"
	},
	"VCTK_p280": {
	"age": 25,
	"gender": "F",
	"accents": "France",
	"region": "France"
	}
	}

	# Convert the data to JSON format
	json_data = json.dumps(data, indent=2)

	# Save the JSON data to a file
	with open('speakers-log.json', 'w') as file:
	file.write(json_data)

	# Run the TTS command to get the speaker indices
	command = "tts --model_path checkpoint_85000.pth --config_path config.json --list_speaker_idxs \| grep -vE '^(\s\\|\|\s>\|\s*$)'"
	output = subprocess.check_output(command, shell=True, text=True)

	# Parse the JSON output into a Python dictionary
	speaker_indices = eval(output)

	# Load the speaker IDs from speakers.json
	with open('speakers-log.json', 'r') as file:
	speaker_ids = json.load(file)

	for speaker_idx in speaker_indices:
	# # Remove the 'VCTK_' prefix
	speaker_id = speaker_idx
	# speaker_id = speaker_idx.replace('VCTK_', '')

	# Lookup the speaker ID in the loaded speaker IDs
	if speaker_id in speaker_ids:
	speaker_id_json = speaker_ids[speaker_id]
	else:
	continue

	# # Generate the TTS command to create the audio file
	text = f"Hello, I am from {speaker_id_json['region']}. I hope that you will select my voice for your project. Thank you."
	# # make samples directory if it doesn't exist
	if not os.path.exists("samples"):
	os.makedirs("samples")

	out_path = f"samples/{speaker_id}.wav"
	tts_command = f"tts --text \"{text}\" --model_path checkpoint_85000.pth --language_idx en --config_path config.json --speaker_idx \"{speaker_id}\" --out_path {out_path}"

	# Execute the TTS command
	os.system(tts_command)