mrfakename
commited on
Commit
•
3f5b3b4
1
Parent(s):
3536c5f
Sync from GitHub repo
Browse filesThis Space is synced from the GitHub repo: https://github.com/SWivid/F5-TTS. Please submit contributions to the Space there
src/f5_tts/infer/infer_cli.py
CHANGED
@@ -75,6 +75,12 @@ parser.add_argument(
|
|
75 |
action="store_true",
|
76 |
help="load vocoder from local. Default: ../checkpoints/charactr/vocos-mel-24khz",
|
77 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
78 |
args = parser.parse_args()
|
79 |
|
80 |
config = tomli.load(open(args.config, "rb"))
|
@@ -102,6 +108,7 @@ model = args.model if args.model else config["model"]
|
|
102 |
ckpt_file = args.ckpt_file if args.ckpt_file else ""
|
103 |
vocab_file = args.vocab_file if args.vocab_file else ""
|
104 |
remove_silence = args.remove_silence if args.remove_silence else config["remove_silence"]
|
|
|
105 |
wave_path = Path(output_dir) / "infer_cli_out.wav"
|
106 |
# spectrogram_path = Path(output_dir) / "infer_cli_out.png"
|
107 |
vocos_local_path = "../checkpoints/charactr/vocos-mel-24khz"
|
@@ -134,7 +141,7 @@ print(f"Using {model}...")
|
|
134 |
ema_model = load_model(model_cls, model_cfg, ckpt_file, vocab_file)
|
135 |
|
136 |
|
137 |
-
def main_process(ref_audio, ref_text, text_gen, model_obj, remove_silence):
|
138 |
main_voice = {"ref_audio": ref_audio, "ref_text": ref_text}
|
139 |
if "voices" not in config:
|
140 |
voices = {"main": main_voice}
|
@@ -168,7 +175,7 @@ def main_process(ref_audio, ref_text, text_gen, model_obj, remove_silence):
|
|
168 |
ref_audio = voices[voice]["ref_audio"]
|
169 |
ref_text = voices[voice]["ref_text"]
|
170 |
print(f"Voice: {voice}")
|
171 |
-
audio, final_sample_rate, spectragram = infer_process(ref_audio, ref_text, gen_text, model_obj)
|
172 |
generated_audio_segments.append(audio)
|
173 |
|
174 |
if generated_audio_segments:
|
@@ -186,7 +193,7 @@ def main_process(ref_audio, ref_text, text_gen, model_obj, remove_silence):
|
|
186 |
|
187 |
|
188 |
def main():
|
189 |
-
main_process(ref_audio, ref_text, gen_text, ema_model, remove_silence)
|
190 |
|
191 |
|
192 |
if __name__ == "__main__":
|
|
|
75 |
action="store_true",
|
76 |
help="load vocoder from local. Default: ../checkpoints/charactr/vocos-mel-24khz",
|
77 |
)
|
78 |
+
parser.add_argument(
|
79 |
+
"--speed",
|
80 |
+
type=float,
|
81 |
+
default=1.0,
|
82 |
+
help="Adjust the speed of the audio generation (default: 1.0)",
|
83 |
+
)
|
84 |
args = parser.parse_args()
|
85 |
|
86 |
config = tomli.load(open(args.config, "rb"))
|
|
|
108 |
ckpt_file = args.ckpt_file if args.ckpt_file else ""
|
109 |
vocab_file = args.vocab_file if args.vocab_file else ""
|
110 |
remove_silence = args.remove_silence if args.remove_silence else config["remove_silence"]
|
111 |
+
speed = args.speed
|
112 |
wave_path = Path(output_dir) / "infer_cli_out.wav"
|
113 |
# spectrogram_path = Path(output_dir) / "infer_cli_out.png"
|
114 |
vocos_local_path = "../checkpoints/charactr/vocos-mel-24khz"
|
|
|
141 |
ema_model = load_model(model_cls, model_cfg, ckpt_file, vocab_file)
|
142 |
|
143 |
|
144 |
+
def main_process(ref_audio, ref_text, text_gen, model_obj, remove_silence, speed):
|
145 |
main_voice = {"ref_audio": ref_audio, "ref_text": ref_text}
|
146 |
if "voices" not in config:
|
147 |
voices = {"main": main_voice}
|
|
|
175 |
ref_audio = voices[voice]["ref_audio"]
|
176 |
ref_text = voices[voice]["ref_text"]
|
177 |
print(f"Voice: {voice}")
|
178 |
+
audio, final_sample_rate, spectragram = infer_process(ref_audio, ref_text, gen_text, model_obj, speed=speed)
|
179 |
generated_audio_segments.append(audio)
|
180 |
|
181 |
if generated_audio_segments:
|
|
|
193 |
|
194 |
|
195 |
def main():
|
196 |
+
main_process(ref_audio, ref_text, gen_text, ema_model, remove_silence, speed)
|
197 |
|
198 |
|
199 |
if __name__ == "__main__":
|