Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
const er = require('@electron/remote') | |
const dialog = er.dialog | |
window.voiceWorkbenchState = { | |
isInit: false, | |
isStarted: false, | |
currentAudioFilePath: undefined, | |
newAudioFilePath: undefined, | |
currentEmb: undefined, | |
refAEmb: undefined, | |
refBEmb: undefined, | |
} | |
window.initVoiceWorkbench = () => { | |
if (!window.voiceWorkbenchState.isInit) { | |
window.voiceWorkbenchState.isInit = true | |
window.refreshExistingCraftedVoices() | |
window.initDropdowns() | |
voiceWorkbenchLanguageDropdown.value = "en" | |
} | |
window.refreshExistingCraftedVoices() | |
} | |
window.refreshExistingCraftedVoices = () => { | |
voiceWorkbenchVoicesList.innerHTML = "" | |
Object.keys(window.games).sort((a,b)=>a>b?1:-1).forEach(gameId => { | |
if (Object.keys(window.gameAssets).includes(gameId)) { | |
const themeColour = window.gameAssets[gameId].themeColourPrimary | |
window.games[gameId].models.forEach(model => { | |
if (model.embOverABaseModel) { | |
const button = createElem("div.voiceType", model.voiceName) | |
button.style.background = `#${themeColour}` | |
button.addEventListener("click", () => window.voiceWorkbenchLoadOrResetCraftedVoice(model)) | |
voiceWorkbenchVoicesList.appendChild(button) | |
} | |
}) | |
} | |
}) | |
} | |
window.voiceWorkbenchLoadOrResetCraftedVoice = (model) => { | |
voiceWorkbenchModelDropdown.value = model ? model.embOverABaseModel : "<base>/base_v1.0" | |
voiceWorkbenchVoiceNameInput.value = model ? model.voiceName : "" | |
voiceWorkbenchVoiceIDInput.value = model ? model.variants[0].voiceId : "" | |
voiceWorkbenchGenderDropdown.value = model ? model.variants[0].gender : "male" | |
voiceWorkbenchAuthorInput.value = model ? (model.variants[0].author || "Anonymous") : "" | |
voiceWorkbenchLanguageDropdown.value = model ? model.variants[0].lang : "en" | |
voiceWorkbenchGamesDropdown.value = model ? model.gameId : "other" | |
voiceWorkbenchCurrentEmbeddingInput.value = model ? model.variants[0].base_speaker_emb : "" | |
window.voiceWorkbenchState.currentEmb = model ? model.variants[0].base_speaker_emb : undefined | |
window.voiceWorkbenchState.currentAudioFilePath = undefined | |
window.voiceWorkbenchState.newAudioFilePath = undefined | |
window.voiceWorkbenchState.refAEmb = undefined | |
window.voiceWorkbenchState.refBEmb = undefined | |
voiceWorkbenchRefAInput.value = "" | |
voiceWorkbenchRefBInput.value = "" | |
if (model) { | |
voiceWorkbenchDeleteButton.disabled = false | |
voiceWorkbenchStartButton.click() | |
} | |
} | |
window.voiceWorkbenchGenerateVoice = async () => { | |
if (!voiceWorkbenchCurrentEmbeddingInput.value.length) { | |
return window.errorModal(window.i18n.ENTER_VOICE_CRAFTING_STARTING_EMB) | |
} | |
// Load the model if it hasn't been loaded already | |
let voiceId = voiceWorkbenchModelDropdown.value.split("/").at(-1) | |
if (!window.currentModel || window.currentModel.voiceId!=voiceId) { | |
let modelPath | |
if (voiceId.includes("Base xVAPitch Model")) { | |
modelPath = `${window.path}/python/xvapitch/base_v1.0.pt` | |
} else { | |
const gameId = voiceWorkbenchModelDropdown.value.split("/").at(0) | |
modelPath = window.userSettings[`modelspath_${gameId}`]+"/"+voiceId | |
} | |
await window.voiceWorkbenchChangeModel(modelPath, voiceId) | |
} | |
const base_lang = voiceWorkbenchLanguageDropdown.value//voiceId.includes("Base xVAPitch Model") ? "en" : window.currentModel.lang | |
let newEmb = undefined | |
// const currentEmbedding = voiceWorkbenchCurrentEmbeddingInput.value.split(",").map(v=>parseFloat(v)) | |
const currentEmbedding = window.voiceWorkbenchState.currentEmb | |
// const currentDelta = voiceWorkbenchCurrentDeltaInput.value.split(",").map(v=>parseFloat(v)) | |
const newEmbedding = window.getVoiceWorkbenchNewEmbedding() | |
const tempFileNum = `${Math.random().toString().split(".")[1]}` | |
const currentTempFileLocation = `${path}/output/temp-${tempFileNum}_current.wav` | |
const newTempFileLocation = `${path}/output/temp-${tempFileNum}_new.wav` | |
// Do the current embedding first | |
const synthRequests = [] | |
synthRequests.push(doSynth(JSON.stringify({ | |
sequence: voiceWorkbenchInputTextArea.value.trim(), | |
useCleanup: true, // TODO, user setting? | |
base_lang, base_emb: currentEmbedding.join(","), outfile: currentTempFileLocation | |
}))) | |
let doingNewAudioFile = false | |
if (voiceWorkbenchCurrentDeltaInput.value.length) { | |
doingNewAudioFile = true | |
synthRequests.push(doSynth(JSON.stringify({ | |
sequence: voiceWorkbenchInputTextArea.value.trim(), | |
useCleanup: true, // TODO, user setting? | |
base_lang, base_emb: newEmbedding.join(","), outfile: newTempFileLocation | |
}))) | |
} | |
// toggleSpinnerButtons() | |
spinnerModal(`${window.i18n.SYNTHESIZING}`) | |
Promise.all(synthRequests).then(res => { | |
closeModal(undefined, [workbenchContainer]) | |
window.voiceWorkbenchState.currentAudioFilePath = currentTempFileLocation | |
voiceWorkbenchAudioCurrentPlayPauseBtn.disabled = false | |
voiceWorkbenchAudioCurrentSaveBtn.disabled = false | |
if (doingNewAudioFile) { | |
window.voiceWorkbenchState.newAudioFilePath = newTempFileLocation | |
voiceWorkbenchAudioNewPlayBtn.disabled = false | |
voiceWorkbenchAudioNewSaveBtn.disabled = false | |
} | |
}) | |
} | |
const doSynth = (body) => { | |
return new Promise(resolve => { | |
doFetch("http://localhost:8008/synthesizeSimple", { | |
method: "Post", | |
body | |
}).then(r=>r.text()).then(resolve) | |
}) | |
} | |
window.getVoiceWorkbenchNewEmbedding = () => { | |
const currentDelta = voiceWorkbenchCurrentDeltaInput.value.split(",").map(v=>parseFloat(v)) | |
const newEmb = window.voiceWorkbenchState.currentEmb.map((v,vi) => { | |
return v + currentDelta[vi]//*strength | |
}) | |
return newEmb | |
} | |
window.voiceWorkbenchChangeModel = (modelPath, voiceId) => { | |
window.currentModel = { | |
outputs: undefined, | |
model: modelPath.replace(".pt", ""), | |
modelType: "xVAPitch", | |
base_lang: voiceWorkbenchLanguageDropdown.value, | |
isBaseModel: true, | |
voiceId: voiceId | |
} | |
generateVoiceButton.dataset.modelQuery = JSON.stringify(window.currentModel) | |
return window.loadModel() | |
} | |
voiceWorkbenchGenerateSampleButton.addEventListener("click", window.voiceWorkbenchGenerateVoice) | |
window.initDropdowns = () => { | |
// Games dropdown | |
Object.keys(window.games).sort((a,b)=>a>b?1:-1).forEach(gameId => { | |
if (gameId!="other") { | |
if (Object.keys(window.gameAssets).includes(gameId)) { | |
const gameName = window.games[gameId].gameTheme.gameName | |
const option = createElem("option", gameName) | |
option.value = gameId | |
voiceWorkbenchGamesDropdown.appendChild(option) | |
} | |
} | |
}) | |
// Models dropdown | |
Object.keys(window.games).forEach(gameId => { | |
if (window.games[gameId].gameTheme) { | |
const gameName = window.games[gameId].gameTheme.gameName | |
window.games[gameId].models.forEach(modelMeta => { | |
const voiceName = modelMeta.voiceName | |
const voiceId = modelMeta.variants[0].voiceId | |
// Variants are not supported by v3 models, so pick the first one only. Also, filter out crafted voices | |
if (modelMeta.variants[0].modelType=="xVAPitch" && !modelMeta.embOverABaseModel) { | |
const option = createElem("option", `[${gameName}] ${voiceName}`) | |
option.value = `${gameId}/${voiceId}` | |
voiceWorkbenchModelDropdown.appendChild(option) | |
} | |
}) | |
} | |
}) | |
} | |
// Change the available languages when the model is changed | |
voiceWorkbenchModelDropdown.addEventListener("change", () => { | |
let voiceId = voiceWorkbenchModelDropdown.value.split("/").at(-1) | |
if (voiceId.includes("base_v1.0")) { | |
window.populateLanguagesDropdownsFromModel(voiceWorkbenchLanguageDropdown) | |
voiceWorkbenchLanguageDropdown.value = "en" | |
} else { | |
const gameId = voiceWorkbenchModelDropdown.value.split("/")[0] | |
if (Object.keys(window.games).includes(gameId)) { | |
const baseModelData = window.games[gameId].models.filter(model => { | |
return model.variants[0].voiceId == voiceWorkbenchModelDropdown.value.split("/").at(-1) | |
})[0] | |
window.populateLanguagesDropdownsFromModel(voiceWorkbenchLanguageDropdown, baseModelData) | |
voiceWorkbenchLanguageDropdown.value = baseModelData.variants[0].lang | |
} | |
} | |
}) | |
voiceWorkbenchStartButton.addEventListener("click", () => { | |
window.voiceWorkbenchState.isStarted = true | |
voiceWorkbenchLoadedContent.style.display = "flex" | |
voiceWorkbenchLoadedContent2.style.display = "flex" | |
voiceWorkbenchStartButton.style.display = "none" | |
// Load the base model's embedding as a starting point, if it's not the built-in base model | |
let voiceId = voiceWorkbenchModelDropdown.value.split("/").at(-1) | |
if (voiceId.includes("base_v1.0")) { | |
} else { | |
const gameId = voiceWorkbenchModelDropdown.value.split("/")[0] | |
if (Object.keys(window.games).includes(gameId)) { | |
const baseModelData = window.games[gameId].models.filter(model => { | |
return model.variants[0].voiceId == voiceWorkbenchModelDropdown.value.split("/").at(-1) | |
})[0] | |
voiceWorkbenchCurrentEmbeddingInput.value = baseModelData.variants[0].base_speaker_emb.join(",") | |
window.voiceWorkbenchState.currentEmb = baseModelData.variants[0].base_speaker_emb | |
} | |
} | |
}) | |
window.setupVoiceWorkbenchDropArea = (container, inputField, callback=undefined) => { | |
const dropFn = (eType, event) => { | |
if (["dragenter", "dragover"].includes(eType)) { | |
container.style.background = "#5b5b5b" | |
container.style.color = "white" | |
} | |
if (["dragleave", "drop"].includes(eType)) { | |
container.style.background = "rgba(0,0,0,0)" | |
container.style.color = "white" | |
} | |
event.preventDefault() | |
event.stopPropagation() | |
const dataLines = [] | |
if (eType=="drop") { | |
const dataTransfer = event.dataTransfer | |
const files = Array.from(dataTransfer.files) | |
if (files[0].path.endsWith(".wav")) { | |
const filePath = String(files[0].path).replaceAll(/\\/g, "/") | |
console.log("filePath", filePath) | |
window.getSpeakerEmbeddingFromFilePath(filePath).then(embedding => { | |
inputField.value = embedding | |
if (callback) { | |
callback(filePath) | |
} | |
}) | |
} else { | |
window.errorModal(window.i18n.ERROR_FILE_MUST_BE_WAV) | |
} | |
} | |
} | |
container.addEventListener("dragenter", event => dropFn("dragenter", event), false) | |
container.addEventListener("dragleave", event => dropFn("dragleave", event), false) | |
container.addEventListener("dragover", event => dropFn("dragover", event), false) | |
container.addEventListener("drop", event => dropFn("drop", event), false) | |
} | |
window.setupVoiceWorkbenchDropArea(voiceWorkbenchCurrentEmbeddingDropzone, voiceWorkbenchCurrentEmbeddingInput, () => { | |
window.voiceWorkbenchState.currentEmb = voiceWorkbenchCurrentEmbeddingInput.value.split(",").map(v=>parseFloat(v)) | |
}) | |
voiceWorkbenchCurrentEmbeddingInput.addEventListener("change", ()=>{ | |
window.voiceWorkbenchState.currentEmb = voiceWorkbenchCurrentEmbeddingInput.value.split(",").map(v=>parseFloat(v)) | |
}) | |
window.setupVoiceWorkbenchDropArea(voiceWorkbenchRefADropzone, voiceWorkbenchRefAInput, (filePath) => { | |
voiceWorkbenchRefAFilePath.innerHTML = window.i18n.FROM_FILE_IS_FILEPATH.replace("_1", filePath) | |
voiceWorkshopApplyDeltaButton.disabled = false | |
window.voiceWorkbenchState.refAEmb = voiceWorkbenchRefAInput.value.split(",").map(v=>parseFloat(v)) | |
window.voiceWorkbenchUpdateDelta() | |
}) | |
window.setupVoiceWorkbenchDropArea(voiceWorkbenchRefBDropzone, voiceWorkbenchRefBInput, (filePath) => { | |
voiceWorkbenchRefBFilePath.innerHTML = window.i18n.FROM_FILE_IS_FILEPATH.replace("_1", filePath) | |
window.voiceWorkbenchState.refBEmb = voiceWorkbenchRefBInput.value.split(",").map(v=>parseFloat(v)) | |
window.voiceWorkbenchUpdateDelta() | |
}) | |
voiceWorkbenchInputTextArea.addEventListener("keyup", () => { | |
voiceWorkbenchGenerateSampleButton.disabled = voiceWorkbenchInputTextArea.value.trim().length==0 | |
}) | |
voiceWorkbenchAudioCurrentPlayPauseBtn.addEventListener("click", () => { | |
const audioPreview = createElem("audio", {autoplay: false}, createElem("source", { | |
src: window.voiceWorkbenchState.currentAudioFilePath | |
})) | |
audioPreview.setSinkId(window.userSettings.base_speaker) | |
}) | |
voiceWorkbenchAudioCurrentSaveBtn.addEventListener("click", async () => { | |
const userChosenPath = await dialog.showSaveDialog({ defaultPath: window.voiceWorkbenchState.currentAudioFilePath }) | |
if (userChosenPath && userChosenPath.filePath) { | |
const outFilePath = userChosenPath.filePath.split(".").at(-1)=="wav" ? userChosenPath.filePath : userChosenPath.filePath+".wav" | |
fs.copyFileSync(window.voiceWorkbenchState.currentAudioFilePath, outFilePath) | |
} | |
}) | |
voiceWorkbenchAudioNewPlayBtn.addEventListener("click", () => { | |
const audioPreview = createElem("audio", {autoplay: false}, createElem("source", { | |
src: window.voiceWorkbenchState.newAudioFilePath | |
})) | |
audioPreview.setSinkId(window.userSettings.base_speaker) | |
}) | |
voiceWorkbenchAudioNewSaveBtn.addEventListener("click", async () => { | |
const userChosenPath = await dialog.showSaveDialog({ defaultPath: window.voiceWorkbenchState.newAudioFilePath }) | |
if (userChosenPath && userChosenPath.filePath) { | |
const outFilePath = userChosenPath.filePath.split(".").at(-1)=="wav" ? userChosenPath.filePath : userChosenPath.filePath+".wav" | |
fs.copyFileSync(window.voiceWorkbenchState.newAudioFilePath, outFilePath) | |
} | |
}) | |
window.voiceWorkbenchUpdateDelta = () => { | |
// Don't do anything if reference file A isn't given | |
if (!window.voiceWorkbenchState.refAEmb) { | |
return | |
} | |
const strengthValue = parseFloat(voiceWorkbenchStrengthInput.value) | |
let delta | |
// When only Ref A is used, the delta is from <current> towards the first reference file A | |
if (window.voiceWorkbenchState.refBEmb == undefined) { | |
delta = window.voiceWorkbenchState.currentEmb.map((v,vi) => { | |
return (window.voiceWorkbenchState.refAEmb[vi] - v) * strengthValue | |
}) | |
} else { | |
// When Ref B is also used, the delta is from ref A to ref B | |
delta = window.voiceWorkbenchState.refAEmb.map((v,vi) => { | |
return (window.voiceWorkbenchState.refBEmb[vi] - v) * strengthValue | |
}) | |
} | |
voiceWorkbenchCurrentDeltaInput.value = delta.join(",") | |
} | |
voiceWorkbenchStrengthSlider.addEventListener("change", () => { | |
voiceWorkbenchStrengthInput.value = voiceWorkbenchStrengthSlider.value | |
window.voiceWorkbenchUpdateDelta() | |
}) | |
voiceWorkbenchStrengthInput.addEventListener("change", () => { | |
voiceWorkbenchStrengthSlider.value = voiceWorkbenchStrengthInput.value | |
window.voiceWorkbenchUpdateDelta() | |
}) | |
voiceWorkshopApplyDeltaButton.addEventListener("click", () => { | |
if (voiceWorkbenchCurrentDeltaInput.value.length) { | |
const newEmb = window.getVoiceWorkbenchNewEmbedding() | |
window.voiceWorkbenchState.currentEmb = newEmb | |
voiceWorkbenchCurrentEmbeddingInput.value = newEmb.join(",") | |
voiceWorkbenchCurrentDeltaInput.value = "" | |
voiceWorkshopApplyDeltaButton.disabled = true | |
voiceWorkbenchRefAInput.value = "" | |
window.voiceWorkbenchState.refAEmb = undefined | |
voiceWorkbenchRefBInput.value = "" | |
window.voiceWorkbenchState.refBEmb = undefined | |
} | |
}) | |
/* | |
Drop file A over the reference audio file A area, to get its embedding | |
When only the reference A file is used, the current delta is this embedding multiplied by the strength | |
Drop file B over the B area, to get a second embedding | |
When both this and A are active, the current delta is the direction from A to B, multiplied by the strength | |
direction meaning B minus A, instead of <current> minus A | |
*/ | |
voiceWorkbenchSaveButton.addEventListener("click", () => { | |
const voiceName = voiceWorkbenchVoiceNameInput.value | |
const voiceId = voiceWorkbenchVoiceIDInput.value | |
const gender = voiceWorkbenchGenderDropdown.value | |
const author = voiceWorkbenchAuthorInput.value || "Anonymous" | |
const lang = voiceWorkbenchLanguageDropdown.value | |
if (!voiceName.trim().length) { | |
return window.errorModal(window.i18n.ENTER_VOICE_NAME) | |
} | |
if (!voiceId.trim().length) { | |
return window.errorModal(window.i18n.ENTER_VOICE_ID) | |
} | |
const modelJson = { | |
"version": "3.0", | |
"modelVersion": "3.0", | |
"modelType": "xVAPitch", | |
"author": author, | |
"lang": lang, | |
"embOverABaseModel": voiceWorkbenchModelDropdown.value, | |
"games": [ | |
{ | |
"gameId": voiceWorkbenchGamesDropdown.value, | |
"voiceId": voiceId, | |
"variant": "Default", | |
"voiceName": voiceName, | |
"base_speaker_emb": window.voiceWorkbenchState.currentEmb, | |
"gender": gender | |
} | |
] | |
} | |
const gameModelsPath = `${window.userSettings[`modelspath_${voiceWorkbenchGamesDropdown.value}`]}` | |
const jsonDestination = `${gameModelsPath}/${voiceId}.json` | |
fs.writeFileSync(jsonDestination, JSON.stringify(modelJson, null, 4)) | |
doSynth(JSON.stringify({ | |
sequence: " This is what my voice sounds like. ", | |
useCleanup: true, // TODO, user setting? | |
base_lang: lang, | |
base_emb: window.voiceWorkbenchState.currentEmb.join(","), outfile: jsonDestination.replace(".json", ".wav") | |
})).then(() => { | |
voiceWorkbenchDeleteButton.disabled = false | |
window.currentModel = undefined | |
generateVoiceButton.dataset.modelQuery = null | |
window.infoModal(window.i18n.VOICE_CREATED_AT.replace("_1", jsonDestination)) | |
// Clean up the temp file from the clean-up post-processing, if it exists | |
if (fs.existsSync(jsonDestination.replace(".json", "_preCleanup.wav"))) { | |
fs.unlinkSync(jsonDestination.replace(".json", "_preCleanup.wav")) | |
} | |
window.loadAllModels().then(() => { | |
window.refreshExistingCraftedVoices() | |
// Refresh the main page voice models if the same game is loaded as the target game models directory as saved into | |
if (window.currentGame.gameId==voiceWorkbenchGamesDropdown.value) { | |
window.changeGame(window.currentGame) | |
window.refreshExistingCraftedVoices() | |
} | |
}) | |
}) | |
}) | |
voiceWorkbenchGamesDropdown.addEventListener("change", () => { | |
const gameModelsPath = `${window.userSettings[`modelspath_${voiceWorkbenchGamesDropdown.value}`]}` | |
const voiceId = voiceWorkbenchVoiceIDInput.value | |
const jsonLocation = `${gameModelsPath}/${voiceId}.json` | |
voiceWorkbenchDeleteButton.disabled = !fs.existsSync(jsonLocation) | |
}) | |
voiceWorkbenchVoiceIDInput.addEventListener("change", () => { | |
const gameModelsPath = `${window.userSettings[`modelspath_${voiceWorkbenchGamesDropdown.value}`]}` | |
const voiceId = voiceWorkbenchVoiceIDInput.value | |
const jsonLocation = `${gameModelsPath}/${voiceId}.json` | |
voiceWorkbenchDeleteButton.disabled = !fs.existsSync(jsonLocation) | |
}) | |
voiceWorkbenchDeleteButton.addEventListener("click", () => { | |
const gameModelsPath = `${window.userSettings[`modelspath_${voiceWorkbenchGamesDropdown.value}`]}` | |
const voiceId = voiceWorkbenchVoiceIDInput.value | |
const jsonLocation = `${gameModelsPath}/${voiceId}.json` | |
window.confirmModal(window.i18n.CONFIRM_DELETE_CRAFTED_VOICE.replace("_1", voiceWorkbenchVoiceNameInput.value).replace("_2", jsonLocation)).then(resp => { | |
if (resp) { | |
if (fs.existsSync(jsonLocation.replace(".json", ".wav"))) { | |
fs.unlinkSync(jsonLocation.replace(".json", ".wav")) | |
} | |
fs.unlinkSync(jsonLocation) | |
} | |
window.infoModal(window.i18n.SUCCESSFULLY_DELETED_CRAFTED_VOICE) | |
window.loadAllModels().then(() => { | |
// Refresh the main page voice models if the same game is loaded as the target game models directory deleted from | |
if (window.currentGame.gameId==voiceWorkbenchGamesDropdown.value) { | |
window.changeGame(window.currentGame) | |
window.refreshExistingCraftedVoices() | |
} | |
voiceWorkbenchCancelButton.click() | |
}) | |
}) | |
}) | |
voiceWorkbenchCancelButton.addEventListener("click", () => { | |
window.voiceWorkbenchState.isStarted = false | |
voiceWorkbenchLoadedContent.style.display = "none" | |
voiceWorkbenchLoadedContent2.style.display = "none" | |
voiceWorkbenchStartButton.style.display = "flex" | |
window.voiceWorkbenchLoadOrResetCraftedVoice() | |
}) |