Ai生成語音怎么調(diào)速(paddleSpeech)

依賴源碼finetuneTTS.py 的功能,搜索這個項目就能找到這個文件,把這些文件拷到本地使用,以下是核心的兩步
1.生成音素
2.生成語音
生成音素.py
import os from Text2Speech.util.finetuneTTS import load_fs2_model, fs2_inference, get_tts_phone_ids, \ get_idx2ph_dict, list_to_durations, \ duration_phones_to_list_fix_speed import paddle import json def generate_yinsu(text, gen_exp_name, outPath, speed=1): ff = open(text, "r", encoding="utf-8") sentences = ff.read() ff.close() text_list = sentences.split("\n") model = load_fs2_model(gen_exp_name) num = 0 for i in text_list: phone_ids = get_tts_phone_ids(i, gen_exp_name, exp_fun=True) duration = fs2_inference(model, text=phone_ids[0], spk_id=paddle.to_tensor(0), alpha=1.0, duration=None, return_duration=True) idx2ph_dict = get_idx2ph_dict(gen_exp_name) phs_lists = duration_phones_to_list_fix_speed(duration, phone_ids, idx2ph_dict,speed) phs_lists_json = outPath + "/phs_list.json" + f'{num}' if os.path.exists(phs_lists_json): os.remove(phs_lists_json) with open(phs_lists_json, "w", encoding="utf8") as f: json.dump(phs_lists, f, indent=3) num += 1 print("------------完成因素生成--------------") def read_yinsu(outPath,index): phs_lists_json = outPath + "/phs_list.json" + f'{index}' if os.path.exists(phs_lists_json): with open(phs_lists_json, "r", encoding="utf8") as f: phs_lists = json.load(f) new_duration = list_to_durations(phs_lists) print(new_duration) return new_duration if __name__ == '__main__': rootPath = "D:/Text2Speech/" # 模型路徑 am_inference_dir = rootPath + "model/" + "guanbo2" # 聲碼器路徑 voc_inference_dir = rootPath + "soundCoder/" + "pwgan_aishell3_static_1.1.0" # 合成音頻輸出路徑 wav_output_dir = rootPath + "output" # 加速設(shè)備cpu gpu,amd pytorch-dml仍然很多不兼容,使用cpu就好 device = "cpu" # 想要生成語音的文本文檔對應文件名 txt_name = rootPath + "speechtxt.txt" # 生成音素可以對特定詞語 語句進行微調(diào),應該先生成這個數(shù)據(jù),后面使用這個數(shù)據(jù),有什么不對的可以進行調(diào)整 generate_yinsu(txt_name, am_inference_dir, wav_output_dir,-1.4)
標簽: