近日工作記錄20211205-1215

2022-06-26 22:57 作者:Nofear-wave 0人讀過 | 我要投稿

這一周嘗試遍歷了styleGAN2的14個(gè)特征層，并選取編輯向量為512維空間的標(biāo)準(zhǔn)正交基進(jìn)行了嘗試，取得了一些進(jìn)展，現(xiàn)將近期嘗試和下一步工作簡(jiǎn)單梳理。

（一）近日工作總結(jié)

1、對(duì)愛因斯坦?jié)撛谙蛄窟M(jìn)行編輯

（二）基本結(jié)果展示

（三）存在的問題

1、數(shù)據(jù)的問題：利用id_invert框架的時(shí)候只在部分?jǐn)?shù)據(jù)上表現(xiàn)良好，需要實(shí)驗(yàn)篩選至少一兩個(gè)合適的數(shù)據(jù)(名人照片等)

2、方法過于簡(jiǎn)單，很多基于直觀的嘗試都需要進(jìn)一步深化，提出某些方法

（四）下一步工作

梳理如下：

1、首要：輸入多張圖片，并提取512維中變化劇烈的關(guān)鍵維度（具體就是多找?guī)讖垐D片，分層輸入生成512個(gè)文件，查看在哪些維度上變化比較劇烈，最后從其中挑選50-100個(gè)就行）

2、提取之后觀察是否可泛化；需要設(shè)計(jì)一種數(shù)學(xué)方法來自動(dòng)提取這些維度(機(jī)器學(xué)習(xí)可解釋性方面的知識(shí)融合一下)，或者找一個(gè)情緒識(shí)別器是否能做到提取。

3、完成提取了某些維度之后，再與現(xiàn)有方法進(jìn)行對(duì)比，隨機(jī)擾動(dòng)和已有語義編輯比較容易生成數(shù)據(jù)，但PCA方法之后需要研究觀察。

4、結(jié)合對(duì)比中提到的三種方法，需要掌握這兩個(gè)指標(biāo)的代碼，并將四種方法生成圖片的結(jié)果生成進(jìn)行對(duì)比，需要完成3。

5、如果id_invert結(jié)果實(shí)在不行的話，可以先用別的encoder提取.npy向量，再放到這個(gè)代碼里進(jìn)行編輯。

（五）代碼修改記錄-防止忘記

1、這個(gè)是分層分維度編輯

def manipulate(latent_codes,
 ? ? ? ? ? ? ? boundary,
 ? ? ? ? ? ? ? start_distance=-5.0,
 ? ? ? ? ? ? ? end_distance=5.0,
 ? ? ? ? ? ? ? step=21,
 ? ? ? ? ? ? ? layerwise_manipulation=False,
 ? ? ? ? ? ? ? num_layers=1,
 ? ? ? ? ? ? ? manipulate_layers=None,
 ? ? ? ? ? ? ? is_code_layerwise=False,
 ? ? ? ? ? ? ? is_boundary_layerwise=False,
 ? ? ? ? ? ? ? layerwise_manipulation_strength=1.0,
 ? ? ? ? ? ? ? flag=1):

 ?if not (boundary.ndim >= 2 and boundary.shape[0] == 1):
 ? ?raise ValueError(f'Boundary should be with shape [1, *code_shape] or '
 ? ? ? ? ? ? ? ? ? ? f'[1, num_layers, *code_shape], but '
 ? ? ? ? ? ? ? ? ? ? f'{boundary.shape} is received!')

 ?if not layerwise_manipulation:
 ? ?assert not is_code_layerwise
 ? ?assert not is_boundary_layerwise
 ? ?num_layers = 1
 ? ?manipulate_layers = None
 ? ?layerwise_manipulation_strength = 1.0

 ?# Preprocessing for layer-wise manipulation.
 ?# Parse indices of manipulation layers.
 ?layer_indices = parse_indices(
 ? ? ?manipulate_layers, min_val=0, max_val=num_layers - 1)
 ?if not layer_indices:
 ? ?layer_indices = list(range(num_layers))
 ?# Make latent codes layer-wise if needed.
 ?assert num_layers > 0
 ?if not is_code_layerwise:
 ? ?x = latent_codes[:, np.newaxis]
 ? ?x = np.tile(x, [num_layers if axis == 1 else 1 for axis in range(x.ndim)])
 ?else:
 ? ?x = latent_codes
 ? ?if x.shape[1] != num_layers:
 ? ? ?raise ValueError(f'Latent codes should be with shape [num, num_layers, '
 ? ? ? ? ? ? ? ? ? ? ? f'*code_shape], where `num_layers` equals to '
 ? ? ? ? ? ? ? ? ? ? ? f'{num_layers}, but {x.shape} is received!')
 ?# Make boundary layer-wise if needed.
 ?if not is_boundary_layerwise:
 ? ?b = boundary
 ? ?b = np.tile(b, [num_layers if axis == 0 else 1 for axis in range(b.ndim)])
 ?else:
 ? ?b = boundary[0]
 ? ?if b.shape[0] != num_layers:
 ? ? ?raise ValueError(f'Boundary should be with shape [num_layers, '
 ? ? ? ? ? ? ? ? ? ? ? f'*code_shape], where `num_layers` equals to '
 ? ? ? ? ? ? ? ? ? ? ? f'{num_layers}, but {b.shape} is received!')
 ?# Get layer-wise manipulation strength.
 ?if isinstance(layerwise_manipulation_strength, (int, float)):
 ? ?s = [float(layerwise_manipulation_strength) for _ in range(num_layers)]
 ?elif isinstance(layerwise_manipulation_strength, (list, tuple)):
 ? ?s = layerwise_manipulation_strength
 ? ?if len(s) != num_layers:
 ? ? ?raise ValueError(f'Shape of layer-wise manipulation strength `{len(s)}` '
 ? ? ? ? ? ? ? ? ? ? ? f'mismatches number of layers `{num_layers}`!')
 ?elif isinstance(layerwise_manipulation_strength, np.ndarray):
 ? ?s = layerwise_manipulation_strength
 ? ?if s.size != num_layers:
 ? ? ?raise ValueError(f'Shape of layer-wise manipulation strength `{s.size}` '
 ? ? ? ? ? ? ? ? ? ? ? f'mismatches number of layers `{num_layers}`!')
 ?else:
 ? ?raise ValueError(f'Unsupported type of `layerwise_manipulation_strength`!')
 ?s = np.array(s).reshape(
 ? ? ?[num_layers if axis == 0 else 1 for axis in range(b.ndim)])
 ?b = b * s

 ?if x.shape[1:] != b.shape:
 ? ?raise ValueError(f'Latent code shape {x.shape} and boundary shape '
 ? ? ? ? ? ? ? ? ? ? f'{b.shape} mismatch!')
 ?num = x.shape[0]
 ?code_shape = x.shape[2:]

 ?x = x[:, np.newaxis]
 ?b = b[np.newaxis, np.newaxis, :]
 ?print(flag)
 ?
 ?#2、3層同一維度編輯
 ?mn = np.zeros((1,1,14,512))
 ?mn[0][0][2][flag]=1
 ?mn[0][0][3][flag]=1

 ?
 ?#單層單維度改變
 ?c = np.zeros(512)
 ?c[flag]=1
 ?
 ?#c[flag+1]=1 #單層多維度編輯
 ?c = c.reshape(1,1,1,512)
 ?
 ?l = np.linspace(start_distance, end_distance, step).reshape(
 ? ? ?[step if axis == 1 else 1 for axis in range(x.ndim)])
 ?results = np.tile(x, [step if axis == 1 else 1 for axis in range(x.ndim)])
 ?is_manipulatable = np.zeros(results.shape, dtype=bool)
 ?is_manipulatable[:, :, layer_indices] = True
 ?results = np.where(is_manipulatable, x + l * c, results)
 ?#results = np.where(is_manipulatable, x + l * mn, results)
 ?assert results.shape == (num, step, num_layers, *code_shape)

 ?return results if layerwise_manipulation else results[:, :, 0]

2、這個(gè)是非線性嘗試

 ?#分步非線性嘗試，類似三角形沿著直角邊走(線性就是沿著斜邊走)
 ?
 ?x = x[:, np.newaxis]
 ?b = b[np.newaxis, np.newaxis, :]
 ?print(flag)
 ?c = np.zeros(512)
 ?c[flag]=1
 ?c = c.reshape(1,1,1,512)

 ?#固定下一個(gè)維度的值
 ?temp = np.zeros(512)
 ?temp[flag+1]=start_distance
 ?temp = temp.reshape(1,1,1,512)
 ?
 ?l = np.linspace(start_distance, end_distance, step).reshape(
 ? ? ?[step if axis == 1 else 1 for axis in range(x.ndim)])
 ?results = np.tile(x, [step if axis == 1 else 1 for axis in range(x.ndim)])
 ?is_manipulatable = np.zeros(results.shape, dtype=bool)
 ?is_manipulatable[:, :, layer_indices] = True
 ?results = np.where(is_manipulatable, x + l * c+temp, results)
 ?assert results.shape == (num, step, num_layers, *code_shape)

 ?return results if layerwise_manipulation else results[:, :, 0]

 #結(jié)果生成也要兩步 ?
 ? ?codes = manipulate(latent_codes=latent_codes,
 ? ? ? ? ? ? ? ? ? ? ? boundary=boundary,
 ? ? ? ? ? ? ? ? ? ? ? start_distance=args.start_distance,
 ? ? ? ? ? ? ? ? ? ? ? end_distance=args.end_distance,
 ? ? ? ? ? ? ? ? ? ? ? step=step,
 ? ? ? ? ? ? ? ? ? ? ? layerwise_manipulation=True,
 ? ? ? ? ? ? ? ? ? ? ? num_layers=generator.num_layers,
 ? ? ? ? ? ? ? ? ? ? ? manipulate_layers=manipulate_layers,
 ? ? ? ? ? ? ? ? ? ? ? is_code_layerwise=True,
 ? ? ? ? ? ? ? ? ? ? ? is_boundary_layerwise=True,
 ? ? ? ? ? ? ? ? ? ? ? flag=flag-1)

 ? ?for img_idx in tqdm(range(num_images), leave=False):
 ? ? ?output_images = generator.easy_synthesize(
 ? ? ? ? ?codes[img_idx], latent_space_type='wp')['image']
 ? ? ?for s, output_image in enumerate(output_images):
 ? ? ? ?visualizer.set_cell(img_idx, s + 3, image=output_image)
 ? ? ? ?save_image(f'{output_dir}/_AI'+str(img_idx)+"linear"+str(s+1)+'.png', output_image)
 ? ?# Save results.
 ? ?visualizer.save(f'{output_dir}/{job_name}_{flag}.html')

 ? ?#以上次的終點(diǎn)作為新的起點(diǎn)
 ? ?next_ = codes[0][step-1]
 ? ?next_codes = manipulate(latent_codes=next_[np.newaxis,:],
 ? ? ? ? ? ? ? ? ? ? ? boundary=boundary,
 ? ? ? ? ? ? ? ? ? ? ? start_distance=0,
 ? ? ? ? ? ? ? ? ? ? ? end_distance=args.end_distance-args.start_distance,
 ? ? ? ? ? ? ? ? ? ? ? step=step,
 ? ? ? ? ? ? ? ? ? ? ? layerwise_manipulation=True,
 ? ? ? ? ? ? ? ? ? ? ? num_layers=generator.num_layers,
 ? ? ? ? ? ? ? ? ? ? ? manipulate_layers=manipulate_layers,
 ? ? ? ? ? ? ? ? ? ? ? is_code_layerwise=True,
 ? ? ? ? ? ? ? ? ? ? ? is_boundary_layerwise=True,
 ? ? ? ? ? ? ? ? ? ? ? flag=flag)
 ? ?for img_idx in tqdm(range(num_images), leave=False):
 ? ? ?output_images = generator.easy_synthesize(
 ? ? ? ? ?next_codes[img_idx], latent_space_type='wp')['image']
 ? ? ?for s, output_image in enumerate(output_images):

 ? ? ? ?save_image(f'{output_dir}/_AI'+str(img_idx)+"linear"+str(s+1+step)+'.png', output_image)
 ? ?

（五）一些基本命令

1、winscp傳輸實(shí)在太慢了，所以嘗試直接用命令行打包傳輸

#將結(jié)果打包為.tar文件
tar -cvf data.tar results

標(biāo)簽：