paddle speech
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 

166 lines
6.6 KiB

# coding:utf-8
import json
from paddlespeech.cli.tts.infer import TTSExecutor
import numpy as np
from scipy.io import wavfile
from pathlib import Path
import sys
import os
import re
from flask import Flask, request, jsonify
from concurrent.futures import ThreadPoolExecutor
executor =ThreadPoolExecutor(1)
app = Flask(__name__)
current_file_path = os.fspath(Path(__file__).resolve().parent)
save_file_path = f"{os.path.join(os.fspath(Path(__file__).resolve().parent.parent),'data','preload')}"
tts=TTSExecutor()
@app.route('/buildvoice', methods=['POST'])
def buildvoice():
# 获取 JSON 请求体
data = request.get_json()
# 从请求体中提取参数
voice_text = data.get('voice_text')
file_name = data.get('file_name')
# 检查参数是否存在
if not voice_text or not file_name:
response = {
'status': 'error',
'message': 'Missing voice_text or file_name',
'data': None
}
return jsonify(response), 400
current_directory = f"{os.path.join(current_file_path, 'conf', 'surname.json')}"
try:
if '@' in voice_text:
audio_data="<speak>"
text=voice_text.split('@')
for txt in text:
if '*' in txt:
audio_data += f"<say-as pinyin='{txt.split('*')[1]}'>{txt.split('*')[0]}</say-as>"
else:
audio_data+=txt
audio_data+='</speak>'
else :
if not os.path.isfile(current_directory):
audio_data=voice_text
else:
try:
with open(current_directory, 'r', encoding='utf-8') as file:
preload_data = json.load(file)
except json.JSONDecodeError as e:
#print(f"预加载姓氏读音配置文件格式错误: {str(e)}")
response = {
'status': 'error',
'message': f"预加载姓氏读音配置文件格式错误: {str(e)}",
'data': None
}
return jsonify(response), 400
audio_data=voice_text
for item in preload_data:
if voice_text.startswith(item['suname']):
audio_data ="<speak>"+ re.sub("^" + re.escape(item['suname']), item['speak'], voice_text)+'</speak>'
break
save_voice(audio_data,f"{save_file_path}/{file_name}.wav")
# 响应内容
response = {
'status': 'success',
'message': 'voice file generated successfully'
}
return jsonify(response), 200
except json.JSONDecodeError as e:
response = {
'status': 'error',
'message': f"build voice file error: {str(e)}",
'data': None
}
return jsonify(response), 400
@app.route('/initvoice', methods=['POST'])
def run_initvoice():
executor.submit(initvoice)
response = {
'status': 'success',
'message': 'build init voice running.....'
}
return jsonify(response), 200
def initvoice():
print(f"=====build voice file start=====")
current_directory = f"{os.path.join(current_file_path, 'conf', 'preload.json')}"
if not os.path.exists(save_file_path):
os.makedirs(save_file_path)
if not os.path.isfile(current_directory):
print("预加载语音配置文件不存在!")
sys.exit(1)
try:
with open(current_directory, 'r', encoding='utf-8') as file:
preload_data = json.load(file)
except json.JSONDecodeError as e:
print(f"预加载语音配置文件格式错误: {str(e)}")
sys.exit(1)
for item in preload_data:
save_voice(item['content'], f"{save_file_path}/{item['name']}.wav")
for i in range(150):
save_voice(f'{i}', f"{save_file_path}/{i}.wav")
print(f"=====build voice file success=====")
def save_voice(audio_data,file_path):
tts(text=audio_data, output=f"temp.wav",device='cpu', spk_id=0, am='speedyspeech_csmsc')
# 将增强音量的音频写入新文件
sample_rate,data=wavfile.read("temp.wav")
new_data=data*2
new_data=np.clip(new_data,-32768,32767).astype(np.int16)
wavfile.write(file_path,sample_rate,new_data)
# if __name__ == '__main__':
# args = sys.argv
# current_directory = f"{os.path.join(current_file_path, 'conf', 'surname.json')}"
# print('生成多音字的格式,注明拼音的文字和其它文字使用@间隔,文字和拼音用*间隔,示例如:覃*qin2@海洋')
# if len(args)!=3:
# print("参数不合法,请检查:需要2个参数 第1个参数为语音文本 第2个参数为语音文件保存路径")
# sys.exit(-1)
# tts=TTSExecutor()
# if '@' in args[1]:
# audio_data="<speak>"
# text=args[1].split('@')
# for txt in text:
# if '*' in txt:
# audio_data += f"<say-as pinyin='{txt.split('*')[1]}'>{txt.split('*')[0]}</say-as>"
# else:
# audio_data+=txt
# audio_data+='</speak>'
# else :
# if not os.path.isfile(current_directory):
# audio_data=args[1]
# else:
# try:
# with open(current_directory, 'r', encoding='utf-8') as file:
# preload_data = json.load(file)
# except json.JSONDecodeError as e:
# print(f"预加载姓氏读音配置文件格式错误: {str(e)}")
# sys.exit(1)
# audio_data=args[1]
# for item in preload_data:
# if args[1].startswith(item['suname']):
# audio_data ="<speak>"+ re.sub("^" + re.escape(item['suname']), item['speak'], args[1])+'</speak>'
# break
# output_file = f"{os.path.join(current_file_path, 'files',args[2])}.wav"
# print('即将生成:{} 的语音文件,保存位置为{}.....'.format(audio_data,output_file))
# tts(text=audio_data, output=f"temp.wav",device='cpu', spk_id=0, am='speedyspeech_csmsc')
# # 将增强音量的音频写入新文件
# print('生成成功,增强音量中')
# sample_rate,data=wavfile.read("temp.wav")
# new_data=data*2
# new_data=np.clip(new_data,-32768,32767).astype(np.int16)
# wavfile.write(output_file,sample_rate,new_data)
# print('语音文件已保存{}'.format(output_file))
if __name__ == '__main__':
app.run(host='0.0.0.0',port=5000,debug=True)