mrfakename
commited on
Commit
•
b0bca14
1
Parent(s):
1df5e0e
Sync from GitHub repo
Browse filesThis Space is synced from the GitHub repo: https://github.com/SWivid/F5-TTS. Please submit contributions to the Space there
src/f5_tts/api.py
CHANGED
@@ -15,6 +15,9 @@ from f5_tts.infer.utils_infer import (
|
|
15 |
infer_process,
|
16 |
remove_silence_for_generated_wav,
|
17 |
save_spectrogram,
|
|
|
|
|
|
|
18 |
)
|
19 |
|
20 |
|
@@ -31,10 +34,8 @@ class F5TTS:
|
|
31 |
):
|
32 |
# Initialize parameters
|
33 |
self.final_wave = None
|
34 |
-
self.target_sample_rate =
|
35 |
-
self.
|
36 |
-
self.hop_length = 256
|
37 |
-
self.target_rms = 0.1
|
38 |
self.seed = -1
|
39 |
|
40 |
# Set device
|
@@ -97,6 +98,9 @@ class F5TTS:
|
|
97 |
seed = random.randint(0, sys.maxsize)
|
98 |
seed_everything(seed)
|
99 |
self.seed = seed
|
|
|
|
|
|
|
100 |
wav, sr, spect = infer_process(
|
101 |
ref_file,
|
102 |
ref_text,
|
|
|
15 |
infer_process,
|
16 |
remove_silence_for_generated_wav,
|
17 |
save_spectrogram,
|
18 |
+
preprocess_ref_audio_text,
|
19 |
+
target_sample_rate,
|
20 |
+
hop_length,
|
21 |
)
|
22 |
|
23 |
|
|
|
34 |
):
|
35 |
# Initialize parameters
|
36 |
self.final_wave = None
|
37 |
+
self.target_sample_rate = target_sample_rate
|
38 |
+
self.hop_length = hop_length
|
|
|
|
|
39 |
self.seed = -1
|
40 |
|
41 |
# Set device
|
|
|
98 |
seed = random.randint(0, sys.maxsize)
|
99 |
seed_everything(seed)
|
100 |
self.seed = seed
|
101 |
+
|
102 |
+
ref_file, ref_text = preprocess_ref_audio_text(ref_file, ref_text, device=self.device)
|
103 |
+
|
104 |
wav, sr, spect = infer_process(
|
105 |
ref_file,
|
106 |
ref_text,
|
src/f5_tts/infer/infer_cli.py
CHANGED
@@ -161,6 +161,8 @@ def main_process(ref_audio, ref_text, text_gen, model_obj, remove_silence, speed
|
|
161 |
chunks = re.split(reg1, text_gen)
|
162 |
reg2 = r"\[(\w+)\]"
|
163 |
for text in chunks:
|
|
|
|
|
164 |
match = re.match(reg2, text)
|
165 |
if match:
|
166 |
voice = match[1]
|
|
|
161 |
chunks = re.split(reg1, text_gen)
|
162 |
reg2 = r"\[(\w+)\]"
|
163 |
for text in chunks:
|
164 |
+
if not text.strip():
|
165 |
+
continue
|
166 |
match = re.match(reg2, text)
|
167 |
if match:
|
168 |
voice = match[1]
|
src/f5_tts/train/finetune_gradio.py
CHANGED
@@ -1216,7 +1216,7 @@ def infer(project, file_checkpoint, exp_name, ref_text, ref_audio, gen_text, nfe
|
|
1216 |
else:
|
1217 |
device_test = None
|
1218 |
|
1219 |
-
if last_checkpoint != file_checkpoint or last_device != device_test or last_ema != use_ema:
|
1220 |
if last_checkpoint != file_checkpoint:
|
1221 |
last_checkpoint = file_checkpoint
|
1222 |
|
|
|
1216 |
else:
|
1217 |
device_test = None
|
1218 |
|
1219 |
+
if last_checkpoint != file_checkpoint or last_device != device_test or last_ema != use_ema or tts_api is None:
|
1220 |
if last_checkpoint != file_checkpoint:
|
1221 |
last_checkpoint = file_checkpoint
|
1222 |
|