lintonxue00
/

lora

Model card Files Files and versions Community

lora / 不知道 /回收站 /1 /ext_VOICEVOX.py

lintonxue00

Upload ext_VOICEVOX.py

b2b3dca over 1 year ago

raw

history blame

No virus

11.6 kB

	'''
	启用tecent翻译可以在YAML 中填入下面的参数
	ng_voice_translate_on : True
	tencentcloud_common_region : "ap-shanghai"
	tencentcloud_common_secretid : "xxxxx"
	tencentcloud_common_secretkey : "xxxxx"
	ng_voice_tar : 'ja'
	'''

	from .Extension import Extension
	import urllib
	import requests
	import uuid
	import os
	import base64
	from aiohttp import request
	from binascii import b2a_base64
	from hashlib import sha1
	from urllib.parse import urlencode
	from hmac import new
	import random
	from sys import maxsize, version_info
	from time import time
	from nonebot import get_driver
	from aiohttp import request
	from loguru import logger
	from nonebot.exception import ActionFailed
	import asyncio


	try:
	from ujson import loads as loadJsonS
	except:
	from json import loads as loadJsonS


	# 扩展的配置信息，用于ai理解扩展的功能必填
	ext_config: dict = {
	"name": "voice", # 扩展名称，用于标识扩展
	"arguments": {
	'sentence': 'str', # 需要转换的文本
	'emotion': 'str', # 情感
	},
	# 扩展的描述信息，用于提示ai理解扩展的功能必填尽量简短使用英文更节省token
	"description": "Send a voice sentence. The emotional parameter must be one of \"normal,sweet,tsundere,sexy,whisper,murmur\" (usage in response: /#voice&hello&sweet#/) ",
	# 参考词，用于上下文参考使用，为空则每次都会被参考(消耗token)
	"refer_word": [],
	# 作者信息
	"author": "恋如雨止",
	# 版本
	"version": "0.0.2",
	# 扩展简介
	"intro": "发送语音消息(支持翻译)",
	}

	# 情感参数表
	emotion_rate_dict = {
	'normal': {
	'custom_attributes': {
	"speed_scale": 1,
	"volume_scale": 1,
	"intonation_scale": 1,
	"pre_phoneme_length": 0.1,
	"post_phoneme_length": 0.1,
	},
	'name': 'ノーマル',
	},
	'sweet': {
	'custom_attributes': {
	"speed_scale": 1.1,
	"volume_scale": 0.9,
	"intonation_scale": 1.3,
	"pre_phoneme_length": 0.2,
	"post_phoneme_length": 0.2,
	},
	'name': 'あまあま',
	},
	'tsundere': {
	'custom_attributes': {
	"speed_scale": 1.0,
	"volume_scale": 1.1,
	"intonation_scale": 1.2,
	"pre_phoneme_length": 0.3,
	"post_phoneme_length": 0.3,
	},
	'name': 'ツンツン',
	},
	'sexy': {
	'custom_attributes': {
	"speed_scale": 0.9,
	"volume_scale": 1.2,
	"intonation_scale": 1.1,
	"pre_phoneme_length": 0.4,
	"post_phoneme_length": 0.4,
	},
	'name': 'セクシー',
	},
	'whisper': {
	'custom_attributes': {
	"speed_scale": 0.8,
	"volume_scale": 1.3,
	"intonation_scale": 1.0,
	"pre_phoneme_length": 0.5,
	"post_phoneme_length": 0.5,
	},
	'name': 'ささやき',
	},
	'murmur': {
	'custom_attributes': {
	"speed_scale": 0.7,
	"volume_scale": 1.4,
	"intonation_scale": 0.9,
	"pre_phoneme_length": 0.6,
	"post_phoneme_length": 0.6,
	},
	'name': 'ヒソヒソ',
	},
	}

	# 情感翻译映射表
	emotion_translate_jp2en = {
	'ノーマル': 'normal',
	'あまあま': 'sweet',
	'ツンツン': 'tsundere',
	'セクシー': 'sexy',
	'ささやき': 'whisper',
	'ヒソヒソ': 'murmur',
	}
	emotion_translate_en2jp = {f: t for t, f in emotion_translate_jp2en.items()}

	class CustomExtension(Extension):
	async def call(self, arg_dict: dict, ctx_data: dict) -> dict:
	""" 当扩展被调用时执行的函数由扩展自行实现

	参数:
	arg_dict: dict, 由ai解析的参数字典 {参数名: 参数值}
	"""
	custom_config: dict = self.get_custom_config() # 获取yaml中的配置信息

	ng_voice_translate_on = custom_config.get(
	'ng_voice_translate_on', False) # 是否启用翻译
	tencentcloud_common_region = custom_config.get(
	'tencentcloud_common_region', "ap-shanghai") # 腾讯翻译-地区
	tencentcloud_common_secretid = custom_config.get(
	'tencentcloud_common_secretid', "xxxxx") # 腾讯翻译-密钥id
	tencentcloud_common_secretkey = custom_config.get(
	'tencentcloud_common_secretkey', "xxxxx") # 腾讯翻译-密钥
	ng_voice_tar = custom_config.get('g_voice_tar', 'ja') # 翻译目标语言
	is_base64 = custom_config.get('is_base64', False) # 是否使用base64编码

	character = custom_config.get('character', 'もち子さん') # 人物
	url = custom_config.get('api_url', '127.0.0.1:50021')

	if not url: # 如果没有配置语音服务器url则返回错误信息
	return {'text': f"[ext_VOICEVOX] 未配置语音服务器url"}
	if not url.startswith('http'): # 如果不是http开头则添加
	url = f'http://{url}'
	if not url.endswith('/'): # 如果不是/结尾则添加
	url = f'{url}/'

	# 音频缓存文件夹
	voice_path = 'voice_cache/'
	if not os.path.exists(voice_path):
	os.mkdir(voice_path)

	# 获取参数
	raw_text = arg_dict.get('sentence', None)
	emotion_key = arg_dict.get('emotion', 'normal')
	# 判断情感索引是否存在如果不存在则使用默认情感
	if emotion_key not in self.character_emotion_dict[character]:
	emotion_key = 'normal'

	""" 腾讯翻译 """
	# 腾讯翻译-签名
	config = get_driver().config

	async def getReqSign(params: dict) -> str:
	common = {
	"Action": "TextTranslate",
	"Region": f"{tencentcloud_common_region}",
	"Timestamp": int(time()),
	"Nonce": random.randint(1, maxsize),
	"SecretId": f"{tencentcloud_common_secretid}",
	"Version": "2018-03-21",
	}
	params.update(common)
	sign_str = "POSTtmt.tencentcloudapi.com/?"
	sign_str += "&".join("%s=%s" %
	(k, params[k]) for k in sorted(params))
	secret_key = tencentcloud_common_secretkey
	if version_info[0] > 2:
	sign_str = bytes(sign_str, "utf-8")
	secret_key = bytes(secret_key, "utf-8")
	hashed = new(secret_key, sign_str, sha1)
	signature = b2a_base64(hashed.digest())[:-1]
	if version_info[0] > 2:
	signature = signature.decode()
	return signature

	async def q_translate(message) -> str:
	_source_text = message
	_source = "auto"
	_target = ng_voice_tar
	try:
	endpoint = "https://tmt.tencentcloudapi.com"
	params = {
	"Source": _source,
	"SourceText": _source_text,
	"Target": _target,
	"ProjectId": 0,
	}
	params["Signature"] = await getReqSign(params)
	# 加上超时参数
	async with request("POST", endpoint, data=params) as resp:
	data = loadJsonS(await asyncio.wait_for(resp.read(), timeout=30))["Response"]
	message = data["TargetText"]
	except ActionFailed as e:
	logger.warning(
	f"ActionFailed {e.info['retcode']} {e.info['msg'].lower()} {e.info['wording']}"
	)
	except TimeoutError as e:
	logger.warning(
	f"TimeoutError {e}"
	)
	return message

	""" 腾讯翻译结束 """

	if ng_voice_translate_on == True:
	t_result = await q_translate(raw_text)
	else:
	t_result = raw_text
	text = t_result + '~' # 加上一个字符，避免合成语音丢失结尾

	# 从self.character_emotion_dict中获取角色，如果emotion_key不存在则使用第一个
	speaker = self.character_emotion_dict[character][emotion_translate_en2jp[emotion_key]]['speaker'] if emotion_translate_en2jp[
	emotion_key] in self.character_emotion_dict[character] else self.character_emotion_dict[character][0]['speaker']
	# 根据emotion_key获取从emotion_rate_dict获取自定义属性
	custom_attributes = emotion_rate_dict[emotion_key]['custom_attributes']

	# 发送查询请求并保存结果
	params = {
	"text": text,
	"speaker": speaker,
	}
	params_encoded = urlencode(params)
	res = requests.post(url + "audio_query?" + params_encoded)
	query_json = res.json()

	# 更新voicevox_query属性
	query_json['speedScale'] = custom_attributes["speed_scale"]
	query_json['volumeScale'] = custom_attributes["volume_scale"]
	query_json['intonationScale'] = custom_attributes["intonation_scale"]
	query_json['prePhonemeLength'] = custom_attributes["pre_phoneme_length"]
	query_json['postPhonemeLength'] = custom_attributes["post_phoneme_length"]

	# 发送语音合成请求并保存结果
	synthesis_params = {
	"speaker": speaker
	}
	params_encoded = urlencode(synthesis_params)
	res = requests.post(f"{url}synthesis?{params_encoded}", json=query_json, timeout=120)
	audio_data = res.content

	file_name = f"{voice_path}{uuid.uuid1()}.wav"

	if is_base64:
	audio_data = base64.b64decode(audio_data)

	with open(file_name, "wb") as f:
	f.write(audio_data)

	local_url = f"file:///{os.path.abspath(file_name)}"

	if text is not None:
	return {
	'voice': local_url, # 语音url
	'text': f"[语音] {raw_text}", # 文本
	}
	return {}

	def __init__(self, custom_config: dict):
	super().__init__(ext_config.copy(), custom_config)

	url = custom_config.get('api_url', '127.0.0.1:50021')

	if not url: # 如果没有配置语音服务器url则返回错误信息
	raise Exception("未配置语音服务器url")
	if not url.startswith('http'): # 如果不是http开头则添加
	url = f'http://{url}'
	if not url.endswith('/'): # 如果不是/结尾则添加
	url = f'{url}/'

	# 从api获取可用角色json
	for _ in range(3):
	try:
	res = requests.get(url + "speakers", timeout=10)
	break
	except requests.exceptions.RequestException as e:
	continue
	else:
	raise Exception("获取语音服务器角色列表失败")
	speaker_json = res.json()

	self.character_emotion_dict = {}

	# 遍历角色json，获取角色列表，保存到 character_emotion_dict 中
	for character in speaker_json:
	character_name = character["name"]
	styles = character["styles"]
	em_dict = {}
	for style in styles:
	em_dict[style["name"]] = {
	"speaker": style["id"],
	"name": style["name"],
	}
	self.character_emotion_dict[character_name] = em_dict

	print(f"[ext_VOICEVOX] 共加载了 {len(self.character_emotion_dict)} 个角色")