手把手教你使用RVC自帶的Edge-TTS,實(shí)現(xiàn)文本轉(zhuǎn)模型語音

tts.py:(可直接復(fù)制到記事本中)
import?asyncio
import?edge_tts
import?sys
TEXT?=?sys.argv[1]?if?len(sys.argv)?>?1?else?"Hello?World!"
VOICE?=?sys.argv[2]?if?len(sys.argv)?>?2?else?"en-GB-SoniaNeural"
rate?=?sys.argv[3]?if?len(sys.argv)?>?3?else?"+0%"
volume?=?sys.argv[4]?if?len(sys.argv)?>?4?else?"+0%"
OUTPUT_FILE?=?"abc.wav"
async?def?_main()?->?None:
????print(rate+"?"+?volume)
????communicate?=?edge_tts.Communicate(TEXT,VOICE,rate,volume)
????await?communicate.save(OUTPUT_FILE)
if?__name__?==?"__main__":
????loop?=?asyncio.get_event_loop()
????try:
????????loop.run_until_complete(_main())
????finally:
????????loop.close()
#TTS功能需要的參數(shù)?:(自己注意下排版對齊)
????????????????????????text_input?=?gr.Textbox(label?=?"在此輸入需要轉(zhuǎn)譯的文字(建議打開自動f0預(yù)測)",)
????????????????????????tts_spk?=?gr.Dropdown(label?=?"選擇原始音頻音色(來自微軟TTS)",?choices=["zh-CN-XiaoyiNeural",?"zh-CN-YunxiNeural",?"zh-CN-liaoning-XiaobeiNeural",?"zh-CN-shaanxi-XiaoniNeural",?"zh-HK-HiuMaanNeural",?"zh-HK-WanLungNeural",?"ja-JP-NanamiNeural",?"ja-JP-KeitaNeural"],?value?=?"zh-CN-XiaoyiNeural")
????????????????????????tts_rate?=?gr.Number(
????????????????????????????label=i18n("TTS變速"),?value=0
????????????????????????)
????????????????????????tts_volume?=?gr.Number(
????????????????????????????label=i18n("TTS變調(diào)"),?value=0
????????????????????????)
#butt0按鈕創(chuàng)建
?but00?=?gr.Button(i18n("轉(zhuǎn)換TTS"),?variant="primary")
#butt0按鈕事件
????????????????????but00.click(
????????????????????????tts_fn,
????????????????????????[
????????????????????????????text_input,
????????????????????????????tts_spk,
????????????????????????????tts_rate,
????????????????????????????tts_volume,
????????????????????????????sid0,
????????????????????????????input_audio0,
????????????????????????????vc_transform0,
????????????????????????????f0_file,
????????????????????????????f0method0,
????????????????????????????file_index1,
????????????????????????????file_index2,
????????????????????????????#?file_big_npy1,
????????????????????????????index_rate1,
????????????????????????????filter_radius0,
????????????????????????????resample_sr0,
????????????????????????????rms_mix_rate0,
????????????????????????????protect0,
????????????????????????],
????????????????????????[vc_output1,?vc_output2],api_name="vc_tts"
????????????????????)
tts_fn():
def?tts_fn(
?????_text,?_speaker,?tts_rate,tts_volume,
????sid,
????input_audio_path,
????f0_up_key,
????f0_file,
????f0_method,
????file_index,
????file_index2,
????#?file_big_npy,
????index_rate,
????filter_radius,
????resample_sr,
????rms_mix_rate,
????protect,
):??
??????#更換模型
???get_vc_self(sid);
???tts_rate?=?int(tts_rate)
???tts_volume?=?int(tts_volume)
???tts_r?="";
???tts_v?="";
???if?tts_rate>=0:
???????tts_r?+=?"+"
???if?tts_volume>=0:
???????tts_v?+=?"+"
???#print(str(tts_rate)+"?"+?str(tts_volume))
???tts_r?+="%s%%"%str(tts_rate);
???tts_v?+=?"%s%%"%str(tts_volume);
???#調(diào)用TTS
???subprocess.run([r"runtime\python.exe",?"tts.py",?_text,_speaker,tts_r,tts_v])#"-5%","+5%"])
???#生成TTS語音寫入本地文件
???sr_44100?=?44100
???y,?sr?=?librosa.load("abc.wav")
???resampled_y?=?librosa.resample(y,?orig_sr=sr,?target_sr=sr_44100)
???sf.write("abc.wav",?resampled_y,?sr_44100,?subtype?=?"PCM_16")
???#生成的地址?RVC目錄下的wav地址
???input_audio?=?"Y:\\Download\\RVC-beta0717\\abc.wav"?
???#再執(zhí)行wav轉(zhuǎn)語音
???info?=?vc_single(0,input_audio,f0_up_key,f0_file,f0_method,file_index,?file_index2,?#?file_big_npy,
????index_rate,
????filter_radius,
????resample_sr,
????rms_mix_rate,
????protect)
???#一定要retrun
???return?info?
get_vc_self():
#?一個選項(xiàng)卡全局只能有一個音色
def?get_vc_self(sid):
????global?n_spk,?tgt_sr,?net_g,?vc,?cpt,?version
????if?sid?==?""?or?sid?==?[]:
????????global?hubert_model
????????if?hubert_model?is?not?None:??#?考慮到輪詢,?需要加個判斷看是否?sid?是由有模型切換到無模型的
????????????print("clean_empty_cache")
????????????del?net_g,?n_spk,?vc,?hubert_model,?tgt_sr??#?,cpt
????????????hubert_model?=?net_g?=?n_spk?=?vc?=?hubert_model?=?tgt_sr?=?None
????????????if?torch.cuda.is_available():
????????????????torch.cuda.empty_cache()
????????????###樓下不這么折騰清理不干凈
????????????if_f0?=?cpt.get("f0",?1)
????????????version?=?cpt.get("version",?"v1")
????????????if?version?==?"v1":
????????????????if?if_f0?==?1:
????????????????????net_g?=?SynthesizerTrnMs256NSFsid(
????????????????????????*cpt["config"],?is_half=config.is_half
????????????????????)
????????????????else:
????????????????????net_g?=?SynthesizerTrnMs256NSFsid_nono(*cpt["config"])
????????????elif?version?==?"v2":
????????????????if?if_f0?==?1:
????????????????????net_g?=?SynthesizerTrnMs768NSFsid(
????????????????????????*cpt["config"],?is_half=config.is_half
????????????????????)
????????????????else:
????????????????????net_g?=?SynthesizerTrnMs768NSFsid_nono(*cpt["config"])
????????????del?net_g,?cpt
????????????if?torch.cuda.is_available():
????????????????torch.cuda.empty_cache()
????????????cpt?=?None
????????return?{"visible":?False,?"__type__":?"update"}
????person?=?"%s/%s"?%?(weight_root,?sid)
????print("loading?%s"?%?person)
????cpt?=?torch.load(person,?map_location="cpu")
????tgt_sr?=?cpt["config"][-1]
????cpt["config"][-3]?=?cpt["weight"]["emb_g.weight"].shape[0]??#?n_spk
????if_f0?=?cpt.get("f0",?1)
????#if?if_f0?==?0:
????#????to_return_protect0?=?to_return_protect1?=?{
????#????????"visible":?False,
????#????????"value":?0.5,
????#????????"__type__":?"update",
????#????}
????#else:
????#????to_return_protect0?=?{
????#????????"visible":?True,
????#????????"value":?to_return_protect0,
????#????????"__type__":?"update",
????#????}
????#????to_return_protect1?=?{
????#????????"visible":?True,
????#????????"value":?to_return_protect1,
????#????????"__type__":?"update",
????#????}
????version?=?cpt.get("version",?"v1")
????if?version?==?"v1":
????????if?if_f0?==?1:
????????????net_g?=?SynthesizerTrnMs256NSFsid(*cpt["config"],?is_half=config.is_half)
????????else:
????????????net_g?=?SynthesizerTrnMs256NSFsid_nono(*cpt["config"])
????elif?version?==?"v2":
????????if?if_f0?==?1:
????????????net_g?=?SynthesizerTrnMs768NSFsid(*cpt["config"],?is_half=config.is_half)
????????else:
????????????net_g?=?SynthesizerTrnMs768NSFsid_nono(*cpt["config"])
????del?net_g.enc_q
????print(net_g.load_state_dict(cpt["weight"],?strict=False))
????net_g.eval().to(config.device)
????if?config.is_half:
????????net_g?=?net_g.half()
????else:
????????net_g?=?net_g.float()
????vc?=?VC(tgt_sr,?config)
????n_spk?=?cpt["config"][-3]
????#return?(
????#????{"visible":?True,?"maximum":?n_spk,?"__type__":?"update"},
????#????to_return_protect0,
????#????to_return_protect1,
????#)