Spaces:
Runtime error
Runtime error
Add FF and refactor a little
Browse files- app.py +19 -15
- requirements.txt +0 -1
- weights/FF/FF.index +3 -0
- weights/FF/FF_e300.pth +3 -0
app.py
CHANGED
@@ -31,18 +31,15 @@ limitation = os.getenv("SYSTEM") == "spaces"
|
|
31 |
|
32 |
config = Config()
|
33 |
|
|
|
34 |
edge_output_filename = "edge_output.mp3"
|
35 |
tts_voice_list = asyncio.get_event_loop().run_until_complete(edge_tts.list_voices())
|
36 |
tts_voices = [f"{v['ShortName']}-{v['Gender']}" for v in tts_voice_list]
|
37 |
|
|
|
38 |
model_root = "weights"
|
39 |
models = [d for d in os.listdir(model_root) if os.path.isdir(f"{model_root}/{d}")]
|
40 |
models.sort()
|
41 |
-
hubert_model = None
|
42 |
-
|
43 |
-
print("Loading rmvpe model...")
|
44 |
-
rmvpe_model = RMVPE("rmvpe.pt", config.is_half, config.device)
|
45 |
-
print("rmvpe model loaded.")
|
46 |
|
47 |
|
48 |
def model_data(model_name):
|
@@ -97,7 +94,7 @@ def model_data(model_name):
|
|
97 |
|
98 |
|
99 |
def load_hubert():
|
100 |
-
global hubert_model
|
101 |
models, _, _ = checkpoint_utils.load_model_ensemble_and_task(
|
102 |
["hubert_base.pt"],
|
103 |
suffix="",
|
@@ -108,7 +105,7 @@ def load_hubert():
|
|
108 |
hubert_model = hubert_model.half()
|
109 |
else:
|
110 |
hubert_model = hubert_model.float()
|
111 |
-
hubert_model.eval()
|
112 |
|
113 |
|
114 |
def tts(
|
@@ -128,7 +125,7 @@ def tts(
|
|
128 |
print(datetime.datetime.now())
|
129 |
print("tts_text:")
|
130 |
print(tts_text)
|
131 |
-
print(f"tts_voice: {tts_voice}")
|
132 |
print(f"Model name: {model_name}")
|
133 |
print(f"F0: {f0_method}, Key: {f0_up_key}, Index: {index_rate}, Protect: {protect}")
|
134 |
try:
|
@@ -139,7 +136,6 @@ def tts(
|
|
139 |
None,
|
140 |
None,
|
141 |
)
|
142 |
-
tgt_sr, net_g, vc, version, index_file, if_f0 = model_data(model_name)
|
143 |
t0 = time.time()
|
144 |
if speed >= 0:
|
145 |
speed_str = f"+{speed}%"
|
@@ -162,11 +158,9 @@ def tts(
|
|
162 |
edge_output_filename,
|
163 |
None,
|
164 |
)
|
165 |
-
|
166 |
f0_up_key = int(f0_up_key)
|
167 |
|
168 |
-
|
169 |
-
load_hubert()
|
170 |
if f0_method == "rmvpe":
|
171 |
vc.model_rmvpe = rmvpe_model
|
172 |
times = [0, 0, 0]
|
@@ -201,9 +195,11 @@ def tts(
|
|
201 |
(tgt_sr, audio_opt),
|
202 |
)
|
203 |
except EOFError:
|
204 |
-
info =
|
205 |
-
It seems that edge-tts output is
|
206 |
-
|
|
|
|
|
207 |
print(info)
|
208 |
return info, None, None
|
209 |
except:
|
@@ -212,6 +208,14 @@ For example, maybe you entered Japanese (without alphabets) text but chose non-J
|
|
212 |
return info, None, None
|
213 |
|
214 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
215 |
initial_md = """
|
216 |
# RVC text-to-speech demo
|
217 |
|
|
|
31 |
|
32 |
config = Config()
|
33 |
|
34 |
+
# Edge TTS
|
35 |
edge_output_filename = "edge_output.mp3"
|
36 |
tts_voice_list = asyncio.get_event_loop().run_until_complete(edge_tts.list_voices())
|
37 |
tts_voices = [f"{v['ShortName']}-{v['Gender']}" for v in tts_voice_list]
|
38 |
|
39 |
+
# RVC models
|
40 |
model_root = "weights"
|
41 |
models = [d for d in os.listdir(model_root) if os.path.isdir(f"{model_root}/{d}")]
|
42 |
models.sort()
|
|
|
|
|
|
|
|
|
|
|
43 |
|
44 |
|
45 |
def model_data(model_name):
|
|
|
94 |
|
95 |
|
96 |
def load_hubert():
|
97 |
+
# global hubert_model
|
98 |
models, _, _ = checkpoint_utils.load_model_ensemble_and_task(
|
99 |
["hubert_base.pt"],
|
100 |
suffix="",
|
|
|
105 |
hubert_model = hubert_model.half()
|
106 |
else:
|
107 |
hubert_model = hubert_model.float()
|
108 |
+
return hubert_model.eval()
|
109 |
|
110 |
|
111 |
def tts(
|
|
|
125 |
print(datetime.datetime.now())
|
126 |
print("tts_text:")
|
127 |
print(tts_text)
|
128 |
+
print(f"tts_voice: {tts_voice}, speed: {speed}")
|
129 |
print(f"Model name: {model_name}")
|
130 |
print(f"F0: {f0_method}, Key: {f0_up_key}, Index: {index_rate}, Protect: {protect}")
|
131 |
try:
|
|
|
136 |
None,
|
137 |
None,
|
138 |
)
|
|
|
139 |
t0 = time.time()
|
140 |
if speed >= 0:
|
141 |
speed_str = f"+{speed}%"
|
|
|
158 |
edge_output_filename,
|
159 |
None,
|
160 |
)
|
|
|
161 |
f0_up_key = int(f0_up_key)
|
162 |
|
163 |
+
tgt_sr, net_g, vc, version, index_file, if_f0 = model_data(model_name)
|
|
|
164 |
if f0_method == "rmvpe":
|
165 |
vc.model_rmvpe = rmvpe_model
|
166 |
times = [0, 0, 0]
|
|
|
195 |
(tgt_sr, audio_opt),
|
196 |
)
|
197 |
except EOFError:
|
198 |
+
info = (
|
199 |
+
"It seems that the edge-tts output is not valid. "
|
200 |
+
"This may occur when the input text and the speaker do not match. "
|
201 |
+
"For example, maybe you entered Japanese (without alphabets) text but chose non-Japanese speaker?"
|
202 |
+
)
|
203 |
print(info)
|
204 |
return info, None, None
|
205 |
except:
|
|
|
208 |
return info, None, None
|
209 |
|
210 |
|
211 |
+
print("Loading hubert model...")
|
212 |
+
hubert_model = load_hubert()
|
213 |
+
print("Hubert model loaded.")
|
214 |
+
|
215 |
+
print("Loading rmvpe model...")
|
216 |
+
rmvpe_model = RMVPE("rmvpe.pt", config.is_half, config.device)
|
217 |
+
print("rmvpe model loaded.")
|
218 |
+
|
219 |
initial_md = """
|
220 |
# RVC text-to-speech demo
|
221 |
|
requirements.txt
CHANGED
@@ -1,4 +1,3 @@
|
|
1 |
-
# Cython==0.29.34
|
2 |
edge_tts==6.1.7
|
3 |
fairseq==0.12.2
|
4 |
faiss_cpu==1.7.4
|
|
|
|
|
1 |
edge_tts==6.1.7
|
2 |
fairseq==0.12.2
|
3 |
faiss_cpu==1.7.4
|
weights/FF/FF.index
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:586dd540bc384163e2107df8b48c2a1d21cc1e89b5eef1c050d0dc12544ebd24
|
3 |
+
size 508489659
|
weights/FF/FF_e300.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3f1f037b3c249418806317a14dd12d5fcabef908a52bc2f1ba2c83ca34569d49
|
3 |
+
size 55232064
|