手機站首頁散文詩歌雜文隨筆日記小小說

散文網(wǎng) » 科技 »學習 » Tacotron2+HifiGAN派蒙600語音合成模型下載

Tacotron2+HifiGAN派蒙600語音合成模型下載

2022-09-09 14:56 作者:霧削木FHZ 0人讀過 | 我要投稿

????模型使用谷歌的Colab進行訓練，沒錢買Colab+所以花了很長時間重連、訓練、重連、訓練；

????定的訓練目標是600，目前已經(jīng)全部訓練完了。

????模型大小為：322MB（338,426,303 字節(jié)）

轉(zhuǎn)換音頻需要輸入拼音+音標數(shù)字

測試音頻：https://wwb.lanzoul.com/ia7gs0bcr6da

因為訓練的數(shù)據(jù)不一，所以不同的句子效果也不同，但UP感覺很接近了，雖然沒有VITS那樣優(yōu)秀；

因為模型大于100MB所以無法上傳到藍奏給大家分享；

谷歌云盤分享鏈接：https://drive.google.com/file/d/1I9kj7187xFyv9xapvmR-oBeILKX0gx9u/view?usp=sharing

另外群內(nèi)（一群）也上傳了文件，無法上谷歌的可以進群下載；

模型調(diào)用代碼：

#@markdown?Config:

#@markdown?Restart?the?code?to?apply?any?changes.

#Add?new?characters?here.

#Universal?HiFi-GAN?(has?some?robotic?noise):?1qpgI41wNXFcH-iKq1Y42JlBC9j0je8PW

Tacotron2_Model?=?'/content/drive/MyDrive/colab/outdir/Paimon_test'#@param?{type:"string"}

TACOTRON2_ID?=?Tacotron2_Model

HIFIGAN_ID?=?"1qpgI41wNXFcH-iKq1Y42JlBC9j0je8PW"

from?pypinyin?import?lazy_pinyin,Style

#?Check?if?Initilized

try:

????initilized

except?NameError:

????print("Setting?up,?please?wait.\n")

????!pip?install?tqdm?-q

????from?tqdm.notebook?import?tqdm

????with?tqdm(total=5,?leave=False)?as?pbar:

????????%tensorflow_version?2.x

????????import?os

????????from?os.path?import?exists,?join,?basename,?splitext

????????!pip?install?gdown

????????git_repo_url?=?'https://github.com/NVIDIA/tacotron2.git'

????????project_name?=?splitext(basename(git_repo_url))[0]

????????if?not?exists(project_name):

????????????#?clone?and?install

????????????!git?clone?-q?--recursive?{git_repo_url}

????????????!git?clone?-q?--recursive?https://github.com/SortAnon/hifi-gan

????????????!pip?install?-q?librosa?unidecode

????????pbar.update(1)?#?downloaded?TT2?and?HiFi-GAN

????????import?sys

????????sys.path.append('hifi-gan')

????????sys.path.append(project_name)

????????import?time

????????import?matplotlib

????????import?matplotlib.pylab?as?plt

????????import?gdown

????????d?=?'https://drive.google.com/uc?id='

????????%matplotlib?inline

????????import?IPython.display?as?ipd

????????import?numpy?as?np

????????import?torch

????????import?json

????????from?hparams?import?create_hparams

????????from?model?import?Tacotron2

????????from?layers?import?TacotronSTFT

????????from?audio_processing?import?griffin_lim

????????from?text?import?text_to_sequence

????????from?env?import?AttrDict

????????from?meldataset?import?MAX_WAV_VALUE

????????from?models?import?Generator

????????pbar.update(1)?#?initialized?Dependancies

????????graph_width?=?900

????????graph_height?=?360

????????def?plot_data(data,?figsize=(int(graph_width/100),?int(graph_height/100))):

????????????%matplotlib?inline

????????????fig,?axes?=?plt.subplots(1,?len(data),?figsize=figsize)

????????????for?i?in?range(len(data)):

????????????????axes[i].imshow(data[i],?aspect='auto',?origin='bottom',?

????????????????????????????interpolation='none',?cmap='inferno')

????????????fig.canvas.draw()

????????????plt.show()

????????#?Setup?Pronounciation?Dictionary

????????!gdown?--id?'1E12g_sREdcH5vuZb44EZYX8JjGWQ9rRp'

????????thisdict?=?{}

????????for?line?in?reversed((open('merged.dict.txt',?"r").read()).splitlines()):

????????????thisdict[(line.split("?",1))[0]]?=?(line.split("?",1))[1].strip()

????????pbar.update(1)?#?Downloaded?and?Set?up?Pronounciation?Dictionary

????????def?ARPA(text,?punctuation=r"!?,.;",?EOS_Token=True):

????????????out?=?''

????????????for?word_?in?text.split("?"):

????????????????word=word_;?end_chars?=?''

????????????????while?any(elem?in?word?for?elem?in?punctuation)?and?len(word)?>?1:

????????????????????if?word[-1]?in?punctuation:?end_chars?=?word[-1]?+?end_chars;?word?=?word[:-1]

????????????????????else:?break

????????????????try:

????????????????????word_arpa?=?thisdict[word.upper()]

????????????????????word?=?"{"?+?str(word_arpa)?+?"}"

????????????????except?KeyError:?pass

????????????????out?=?(out?+?"?"?+?word?+?end_chars).strip()

????????????if?EOS_Token?and?out[-1]?!=?";":?out?+=?";"

????????????return?out

????????def?get_hifigan(MODEL_ID):

????????????#?Download?HiFi-GAN

????????????hifigan_pretrained_model?=?'hifimodel'

????????????gdown.download(d+MODEL_ID,?hifigan_pretrained_model,?quiet=False)

????????????if?not?exists(hifigan_pretrained_model):

????????????????raise?Exception("HiFI-GAN?model?failed?to?download!")

????????????#?Load?HiFi-GAN

????????????conf?=?os.path.join("hifi-gan",?"config_v1.json")

????????????with?open(conf)?as?f:

????????????????json_config?=?json.loads(f.read())

????????????h?=?AttrDict(json_config)

????????????torch.manual_seed(h.seed)

????????????hifigan?=?Generator(h).to(torch.device("cuda"))

????????????state_dict_g?=?torch.load(hifigan_pretrained_model,?map_location=torch.device("cuda"))

????????????hifigan.load_state_dict(state_dict_g["generator"])

????????????hifigan.eval()

????????????hifigan.remove_weight_norm()

????????????return?hifigan,?h

????????hifigan,?h?=?get_hifigan(HIFIGAN_ID)

????????pbar.update(1)?#?Downloaded?and?Set?up?HiFi-GAN

????????def?has_MMI(STATE_DICT):

????????????return?any(True?for?x?in?STATE_DICT.keys()?if?"mi."?in?x)

????????def?get_Tactron2(MODEL_ID):

????????????#?Download?Tacotron2

????????????tacotron2_pretrained_model?=?TACOTRON2_ID

????????????if?not?exists(tacotron2_pretrained_model):

????????????????raise?Exception("Tacotron2?model?failed?to?download!")

????????????#?Load?Tacotron2?and?Config

????????????hparams?=?create_hparams()

????????????hparams.sampling_rate?=?22050

????????????hparams.max_decoder_steps?=?3000?#?Max?Duration

????????????hparams.gate_threshold?=?0.25?#?Model?must?be?25%?sure?the?clip?is?over?before?ending?generation

????????????model?=?Tacotron2(hparams)

????????????state_dict?=?torch.load(tacotron2_pretrained_model)['state_dict']

????????????if?has_MMI(state_dict):

????????????????raise?Exception("ERROR:?This?notebook?does?not?currently?support?MMI?models.")

????????????model.load_state_dict(state_dict)

????????????_?=?model.cuda().eval().half()

????????????return?model,?hparams

????????model,?hparams?=?get_Tactron2(TACOTRON2_ID)

????????previous_tt2_id?=?TACOTRON2_ID

????????pbar.update(1)?#?Downloaded?and?Set?up?Tacotron2

????????#?Extra?Info

????????def?end_to_end_infer(text,?pronounciation_dictionary,?show_graphs):

????????????for?i?in?[x?for?x?in?text.split("\n")?if?len(x)]:

????????????????if?not?pronounciation_dictionary:

????????????????????if?i[-1]?!=?";":?i=i+";"?

????????????????else:?i?=?ARPA(i)

????????????????with?torch.no_grad():?#?save?VRAM?by?not?including?gradients

????????????????????sequence?=?np.array(text_to_sequence(i,?['english_cleaners']))[None,?:]

????????????????????sequence?=?torch.autograd.Variable(torch.from_numpy(sequence)).cuda().long()

????????????????????mel_outputs,?mel_outputs_postnet,?_,?alignments?=?model.inference(sequence)

????????????????????if?show_graphs:

????????????????????????plot_data((mel_outputs_postnet.float().data.cpu().numpy()[0],

????????????????????????????????alignments.float().data.cpu().numpy()[0].T))

????????????????????y_g_hat?=?hifigan(mel_outputs_postnet.float())

????????????????????audio?=?y_g_hat.squeeze()

????????????????????audio?=?audio?*?MAX_WAV_VALUE

????????????????????print("")

????????????????????ipd.display(ipd.Audio(audio.cpu().numpy().astype("int16"),?rate=hparams.sampling_rate))

????from?IPython.display?import?clear_output

????clear_output()

????initilized?=?"Ready"

if?previous_tt2_id?!=?TACOTRON2_ID:

????print("Updating?Models")

????model,?hparams?=?get_Tactron2(TACOTRON2_ID)

????hifigan,?h?=?get_hifigan(HIFIGAN_ID)

????previous_tt2_id?=?TACOTRON2_ID

pronounciation_dictionary?=?False?#@param?{type:"boolean"}

#?disables?automatic?ARPAbet?conversion,?useful?for?inputting?your?own?ARPAbet?pronounciations?or?just?for?testing

show_graphs?=?True?#@param?{type:"boolean"}

max_duration?=?25?#this?does?nothing

model.decoder.max_decoder_steps?=?1000?#@param?{type:"integer"}

stop_threshold?=?0.3?#@param?{type:"number"}

model.decoder.gate_threshold?=?stop_threshold

#@markdown?---

print(f"Current?Config:\npronounciation_dictionary:?{pronounciation_dictionary}\nshow_graphs:?{show_graphs}\nmax_duration?(in?seconds):?{max_duration}\nstop_threshold:?{stop_threshold}\n\n")

time.sleep(1)

print("Enter/Paste?your?text.輸入拼音+數(shù)字表示聲調(diào)，支持直接中文輸入")

contents?=?[]

while?True:

????try:

????????print("-"*50)

????????line?=?input()

????????if?line?!=?"":

??????????line?=?"?".join(lazy_pinyin(line,?style=Style.TONE3))

????????print(line)

????????end_to_end_infer(line,?pronounciation_dictionary,?show_graphs)

????except?EOFError:

????????break

????except?KeyboardInterrupt:

????????print("Stopping...")

????????break

標簽：

Tacotron2+HifiGAN派蒙600語音合成模型下載的評論 (共條)

愛情散文傷感散文哲理散文優(yōu)美生活隨筆親情唯美句子傷感的句子現(xiàn)代詩歌空間日志經(jīng)典語句愛情句子作文大全

最美情侣中文字幕电影,在线麻豆精品传媒,在线网站高清黄,久久黄色视频

Tacotron2+HifiGAN派蒙600語音合成模型下載

Tacotron2+HifiGAN派蒙600語音合成模型下載的評論 (共條)

你可能也喜歡這些文章

最新發(fā)布的文章

最美情侣中文字幕电影,在线麻豆精品传媒,在线网站高清黄,久久黄色视频

Tacotron2+HifiGAN派蒙600語音合成模型下載

本文作者的其他文章

Tacotron2+HifiGAN派蒙600語音合成模型下載的評論 (共 條)

你可能也喜歡這些文章

最新發(fā)布的文章

Tacotron2+HifiGAN派蒙600語音合成模型下載的評論 (共條)