Spaces:
Running
Running
File size: 10,902 Bytes
c5ed230 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 |
import re
import requests
import os
import random
import string
from requests_toolbelt.multipart.encoder import MultipartEncoder
abs_path = os.path.dirname(__file__)
base = "http://127.0.0.1:23456"
# 映射表
def voice_speakers():
url = f"{base}/voice/speakers"
res = requests.post(url=url)
json = res.json()
for i in json:
print(i)
for j in json[i]:
print(j)
return json
# 语音合成 voice vits
def voice_vits(text, id=0, format="wav", lang="auto", length=1, noise=0.667, noisew=0.8, max=50):
fields = {
"text": text,
"id": str(id),
"format": format,
"lang": lang,
"length": str(length),
"noise": str(noise),
"noisew": str(noisew),
"max": str(max)
}
boundary = '----VoiceConversionFormBoundary' + ''.join(random.sample(string.ascii_letters + string.digits, 16))
m = MultipartEncoder(fields=fields, boundary=boundary)
headers = {"Content-Type": m.content_type}
url = f"{base}/voice"
res = requests.post(url=url, data=m, headers=headers)
fname = re.findall("filename=(.+)", res.headers["Content-Disposition"])[0]
path = f"{abs_path}/{fname}"
with open(path, "wb") as f:
f.write(res.content)
print(path)
return path
# 语音转换 hubert-vits
def voice_hubert_vits(upload_path, id, format="wav", length=1, noise=0.667, noisew=0.8):
upload_name = os.path.basename(upload_path)
upload_type = f'audio/{upload_name.split(".")[1]}' # wav,ogg
with open(upload_path, 'rb') as upload_file:
fields = {
"upload": (upload_name, upload_file, upload_type),
"id": str(id),
"format": format,
"length": str(length),
"noise": str(noise),
"noisew": str(noisew),
}
boundary = '----VoiceConversionFormBoundary' + ''.join(random.sample(string.ascii_letters + string.digits, 16))
m = MultipartEncoder(fields=fields, boundary=boundary)
headers = {"Content-Type": m.content_type}
url = f"{base}/voice/hubert-vits"
res = requests.post(url=url, data=m, headers=headers)
fname = re.findall("filename=(.+)", res.headers["Content-Disposition"])[0]
path = f"{abs_path}/{fname}"
with open(path, "wb") as f:
f.write(res.content)
print(path)
return path
# 维度情感模型 w2v2-vits
def voice_w2v2_vits(text, id=0, format="wav", lang="auto", length=1, noise=0.667, noisew=0.8, max=50, emotion=0):
fields = {
"text": text,
"id": str(id),
"format": format,
"lang": lang,
"length": str(length),
"noise": str(noise),
"noisew": str(noisew),
"max": str(max),
"emotion": str(emotion)
}
boundary = '----VoiceConversionFormBoundary' + ''.join(random.sample(string.ascii_letters + string.digits, 16))
m = MultipartEncoder(fields=fields, boundary=boundary)
headers = {"Content-Type": m.content_type}
url = f"{base}/voice/w2v2-vits"
res = requests.post(url=url, data=m, headers=headers)
fname = re.findall("filename=(.+)", res.headers["Content-Disposition"])[0]
path = f"{abs_path}/{fname}"
with open(path, "wb") as f:
f.write(res.content)
print(path)
return path
# 语音转换 同VITS模型内角色之间的音色转换
def voice_conversion(upload_path, original_id, target_id):
upload_name = os.path.basename(upload_path)
upload_type = f'audio/{upload_name.split(".")[1]}' # wav,ogg
with open(upload_path, 'rb') as upload_file:
fields = {
"upload": (upload_name, upload_file, upload_type),
"original_id": str(original_id),
"target_id": str(target_id),
}
boundary = '----VoiceConversionFormBoundary' + ''.join(random.sample(string.ascii_letters + string.digits, 16))
m = MultipartEncoder(fields=fields, boundary=boundary)
headers = {"Content-Type": m.content_type}
url = f"{base}/voice/conversion"
res = requests.post(url=url, data=m, headers=headers)
fname = re.findall("filename=(.+)", res.headers["Content-Disposition"])[0]
path = f"{abs_path}/{fname}"
with open(path, "wb") as f:
f.write(res.content)
print(path)
return path
def voice_ssml(ssml):
fields = {
"ssml": ssml,
}
boundary = '----VoiceConversionFormBoundary' + ''.join(random.sample(string.ascii_letters + string.digits, 16))
m = MultipartEncoder(fields=fields, boundary=boundary)
headers = {"Content-Type": m.content_type}
url = f"{base}/voice/ssml"
res = requests.post(url=url, data=m, headers=headers)
fname = re.findall("filename=(.+)", res.headers["Content-Disposition"])[0]
path = f"{abs_path}/{fname}"
with open(path, "wb") as f:
f.write(res.content)
print(path)
return path
def voice_dimensional_emotion(upload_path):
upload_name = os.path.basename(upload_path)
upload_type = f'audio/{upload_name.split(".")[1]}' # wav,ogg
with open(upload_path, 'rb') as upload_file:
fields = {
"upload": (upload_name, upload_file, upload_type),
}
boundary = '----VoiceConversionFormBoundary' + ''.join(random.sample(string.ascii_letters + string.digits, 16))
m = MultipartEncoder(fields=fields, boundary=boundary)
headers = {"Content-Type": m.content_type}
url = f"{base}/voice/dimension-emotion"
res = requests.post(url=url, data=m, headers=headers)
fname = re.findall("filename=(.+)", res.headers["Content-Disposition"])[0]
path = f"{abs_path}/{fname}"
with open(path, "wb") as f:
f.write(res.content)
print(path)
return path
import time
# while 1:
# text = input()
# l = len(text)
# time1 = time.time()
# voice_vits(text)
# time2 = time.time()
# print(f"len:{l}耗时:{time2 - time1}")
# text = "你好"
# ssml = """
# <speak lang="zh" format="mp3" length="1.2">
# <voice id="92" >这几天心里颇不宁静。</voice>
# <voice id="125">今晚在院子里坐着乘凉,忽然想起日日走过的荷塘,在这满月的光里,总该另有一番样子吧。</voice>
# <voice id="142">月亮渐渐地升高了,墙外马路上孩子们的欢笑,已经听不见了;</voice>
# <voice id="98">妻在屋里拍着闰儿,迷迷糊糊地哼着眠歌。</voice>
# <voice id="120">我悄悄地披了大衫,带上门出去。</voice><break time="2s"/>
# <voice id="121">沿着荷塘,是一条曲折的小煤屑路。</voice>
# <voice id="122">这是一条幽僻的路;白天也少人走,夜晚更加寂寞。</voice>
# <voice id="123">荷塘四面,长着许多树,蓊蓊郁郁的。</voice>
# <voice id="124">路的一旁,是些杨柳,和一些不知道名字的树。</voice>
# <voice id="125">没有月光的晚上,这路上阴森森的,有些怕人。</voice>
# <voice id="126">今晚却很好,虽然月光也还是淡淡的。</voice><break time="2s"/>
# <voice id="127">路上只我一个人,背着手踱着。</voice>
# <voice id="128">这一片天地好像是我的;我也像超出了平常的自己,到了另一个世界里。</voice>
# <voice id="129">我爱热闹,也爱冷静;<break strength="x-weak"/>爱群居,也爱独处。</voice>
# <voice id="130">像今晚上,一个人在这苍茫的月下,什么都可以想,什么都可以不想,便觉是个自由的人。</voice>
# <voice id="131">白天里一定要做的事,一定要说的话,现在都可不理。</voice>
# <voice id="132">这是独处的妙处,我且受用这无边的荷香月色好了。</voice>
# </speak>
# """
# ssml = """
# <speak lang="zh">
# <voice id="92" length="1.4">这几天心里颇不宁静。今晚<break/>在院子里坐着乘凉,忽然想起<break/>日日走过的荷塘,在这满月的光里,总该另有一番样子吧。</voice>
# <voice id="142" length="1.4">月亮渐渐地升高了,墙外马路上孩子们的欢笑,已经听不见了;</voice><break time="2s"/>
# <voice id="0" length="1.4" model="w2v2-vits" lang="ja">こんにちは</voice>
# </speak>
# """
# ssml = """
# <speak lang="ja">
# <voice id="142" length="1.4">こんにちは</voice>
# <voice id="0" length="1.4" model="w2v2-vits" emotion="177">こんにちは</voice>
# <voice id="0" length="1.4" model="w2v2-vits">こんにちは</voice>
# </speak>
# """
ssml = """
<speak lang="auto">
<voice>这几天心里颇不宁静。</voice>
<voice>今晚在院子里坐着乘凉,忽然想起日日走过的荷塘,在这满月的光里,总该另有一番样子吧。</voice>
<voice>月亮渐渐地升高了,墙外马路上孩子们的欢笑,已经听不见了;</voice>
<voice>妻在屋里拍着闰儿,迷迷糊糊地哼着眠歌。</voice>
<voice>我悄悄地披了大衫,带上门出去。</voice><break time="2s"/>
<voice>沿着荷塘,是一条曲折的小煤屑路。</voice>
<voice>这是一条幽僻的路;白天也少人走,夜晚更加寂寞。</voice>
<voice>荷塘四面,长着许多树,蓊蓊郁郁的。</voice>
<voice>路的一旁,是些杨柳,和一些不知道名字的树。</voice>
<voice>没有月光的晚上,这路上阴森森的,有些怕人。</voice>
<voice>今晚却很好,虽然月光也还是淡淡的。</voice><break time="2s"/>
<voice>路上只我一个人,背着手踱着。</voice>
<voice>这一片天地好像是我的;我也像超出了平常的自己,到了另一个世界里。</voice>
<voice>我爱热闹,也爱冷静;<break strength="x-weak"/>爱群居,也爱独处。</voice>
<voice>像今晚上,一个人在这苍茫的月下,什么都可以想,什么都可以不想,便觉是个自由的人。</voice>
<voice>白天里一定要做的事,一定要说的话,现在都可不理。</voice>
<voice>这是独处的妙处,我且受用这无边的荷香月色好了。</voice>
</speak>
"""
text = """猫咪是爱撒娇、爱玩耍的小家伙,通常有着柔软的绒毛和温柔的眼神,是许多人都喜欢的宠物哦~它们特别喜欢舔自己的毛发,用柔顺的小脑袋搓人的脚丫子,还能给人带来很多欢乐和温馨。
"""
t1 = time.time()
# voice_conversion("H:/git/vits-simple-api/25ecb3f6-f968-11ed-b094-e0d4e84af078.wav", 91, 93)
# voice_hubert_vits("H:/git/vits-simple-api/25ecb3f6-f968-11ed-b094-e0d4e84af078.wav",0)
# voice_vits(text,format="wav",lang="zh")
# voice_w2v2_vits(text,emotion=111)
# os.system(voice_ssml(ssml))
os.system(voice_vits(text,id=0, format="wav", max=0))
# voice_dimensional_emotion("H:/git/vits-simple-api/25ecb3f6-f968-11ed-b094-e0d4e84af078.wav")
t2 = time.time()
print(f"len:{len(text)}耗时:{t2 - t1}")
|