近日工作記錄20211205-1215
這一周嘗試遍歷了styleGAN2的14個(gè)特征層,并選取編輯向量為512維空間的標(biāo)準(zhǔn)正交基進(jìn)行了嘗試,取得了一些進(jìn)展,現(xiàn)將近期嘗試和下一步工作簡(jiǎn)單梳理。

(一)近日工作總結(jié)
1、對(duì)愛因斯坦?jié)撛谙蛄窟M(jìn)行編輯

(二)基本結(jié)果展示

(三)存在的問題
1、數(shù)據(jù)的問題:利用id_invert框架的時(shí)候只在部分?jǐn)?shù)據(jù)上表現(xiàn)良好,需要實(shí)驗(yàn)篩選至少一兩個(gè)合適的數(shù)據(jù)(名人照片等)
2、方法過于簡(jiǎn)單,很多基于直觀的嘗試都需要進(jìn)一步深化,提出某些方法
(四)下一步工作

梳理如下:
1、首要:輸入多張圖片,并提取512維中變化劇烈的關(guān)鍵維度(具體就是多找?guī)讖垐D片,分層輸入生成512個(gè)文件,查看在哪些維度上變化比較劇烈,最后從其中挑選50-100個(gè)就行)
2、提取之后觀察是否可泛化;需要設(shè)計(jì)一種數(shù)學(xué)方法來自動(dòng)提取這些維度(機(jī)器學(xué)習(xí)可解釋性方面的知識(shí)融合一下),或者找一個(gè)情緒識(shí)別器是否能做到提取。
3、完成提取了某些維度之后,再與現(xiàn)有方法進(jìn)行對(duì)比,隨機(jī)擾動(dòng)和已有語義編輯比較容易生成數(shù)據(jù),但PCA方法之后需要研究觀察。
4、結(jié)合對(duì)比中提到的三種方法,需要掌握這兩個(gè)指標(biāo)的代碼,并將四種方法生成圖片的結(jié)果生成進(jìn)行對(duì)比,需要完成3。
5、如果id_invert結(jié)果實(shí)在不行的話,可以先用別的encoder提取.npy向量,再放到這個(gè)代碼里進(jìn)行編輯。
(五)代碼修改記錄-防止忘記
1、這個(gè)是分層分維度編輯
def manipulate(latent_codes,
? ? ? ? ? ? ? boundary,
? ? ? ? ? ? ? start_distance=-5.0,
? ? ? ? ? ? ? end_distance=5.0,
? ? ? ? ? ? ? step=21,
? ? ? ? ? ? ? layerwise_manipulation=False,
? ? ? ? ? ? ? num_layers=1,
? ? ? ? ? ? ? manipulate_layers=None,
? ? ? ? ? ? ? is_code_layerwise=False,
? ? ? ? ? ? ? is_boundary_layerwise=False,
? ? ? ? ? ? ? layerwise_manipulation_strength=1.0,
? ? ? ? ? ? ? flag=1):
?if not (boundary.ndim >= 2 and boundary.shape[0] == 1):
? ?raise ValueError(f'Boundary should be with shape [1, *code_shape] or '
? ? ? ? ? ? ? ? ? ? f'[1, num_layers, *code_shape], but '
? ? ? ? ? ? ? ? ? ? f'{boundary.shape} is received!')
?if not layerwise_manipulation:
? ?assert not is_code_layerwise
? ?assert not is_boundary_layerwise
? ?num_layers = 1
? ?manipulate_layers = None
? ?layerwise_manipulation_strength = 1.0
?# Preprocessing for layer-wise manipulation.
?# Parse indices of manipulation layers.
?layer_indices = parse_indices(
? ? ?manipulate_layers, min_val=0, max_val=num_layers - 1)
?if not layer_indices:
? ?layer_indices = list(range(num_layers))
?# Make latent codes layer-wise if needed.
?assert num_layers > 0
?if not is_code_layerwise:
? ?x = latent_codes[:, np.newaxis]
? ?x = np.tile(x, [num_layers if axis == 1 else 1 for axis in range(x.ndim)])
?else:
? ?x = latent_codes
? ?if x.shape[1] != num_layers:
? ? ?raise ValueError(f'Latent codes should be with shape [num, num_layers, '
? ? ? ? ? ? ? ? ? ? ? f'*code_shape], where `num_layers` equals to '
? ? ? ? ? ? ? ? ? ? ? f'{num_layers}, but {x.shape} is received!')
?# Make boundary layer-wise if needed.
?if not is_boundary_layerwise:
? ?b = boundary
? ?b = np.tile(b, [num_layers if axis == 0 else 1 for axis in range(b.ndim)])
?else:
? ?b = boundary[0]
? ?if b.shape[0] != num_layers:
? ? ?raise ValueError(f'Boundary should be with shape [num_layers, '
? ? ? ? ? ? ? ? ? ? ? f'*code_shape], where `num_layers` equals to '
? ? ? ? ? ? ? ? ? ? ? f'{num_layers}, but {b.shape} is received!')
?# Get layer-wise manipulation strength.
?if isinstance(layerwise_manipulation_strength, (int, float)):
? ?s = [float(layerwise_manipulation_strength) for _ in range(num_layers)]
?elif isinstance(layerwise_manipulation_strength, (list, tuple)):
? ?s = layerwise_manipulation_strength
? ?if len(s) != num_layers:
? ? ?raise ValueError(f'Shape of layer-wise manipulation strength `{len(s)}` '
? ? ? ? ? ? ? ? ? ? ? f'mismatches number of layers `{num_layers}`!')
?elif isinstance(layerwise_manipulation_strength, np.ndarray):
? ?s = layerwise_manipulation_strength
? ?if s.size != num_layers:
? ? ?raise ValueError(f'Shape of layer-wise manipulation strength `{s.size}` '
? ? ? ? ? ? ? ? ? ? ? f'mismatches number of layers `{num_layers}`!')
?else:
? ?raise ValueError(f'Unsupported type of `layerwise_manipulation_strength`!')
?s = np.array(s).reshape(
? ? ?[num_layers if axis == 0 else 1 for axis in range(b.ndim)])
?b = b * s
?if x.shape[1:] != b.shape:
? ?raise ValueError(f'Latent code shape {x.shape} and boundary shape '
? ? ? ? ? ? ? ? ? ? f'{b.shape} mismatch!')
?num = x.shape[0]
?code_shape = x.shape[2:]
?x = x[:, np.newaxis]
?b = b[np.newaxis, np.newaxis, :]
?print(flag)
?
?#2、3層同一維度編輯
?mn = np.zeros((1,1,14,512))
?mn[0][0][2][flag]=1
?mn[0][0][3][flag]=1
?
?#單層單維度改變
?c = np.zeros(512)
?c[flag]=1
?
?#c[flag+1]=1 #單層多維度編輯
?c = c.reshape(1,1,1,512)
?
?l = np.linspace(start_distance, end_distance, step).reshape(
? ? ?[step if axis == 1 else 1 for axis in range(x.ndim)])
?results = np.tile(x, [step if axis == 1 else 1 for axis in range(x.ndim)])
?is_manipulatable = np.zeros(results.shape, dtype=bool)
?is_manipulatable[:, :, layer_indices] = True
?results = np.where(is_manipulatable, x + l * c, results)
?#results = np.where(is_manipulatable, x + l * mn, results)
?assert results.shape == (num, step, num_layers, *code_shape)
?return results if layerwise_manipulation else results[:, :, 0]
2、這個(gè)是非線性嘗試
?#分步非線性嘗試,類似三角形沿著直角邊走(線性就是沿著斜邊走)
?
?x = x[:, np.newaxis]
?b = b[np.newaxis, np.newaxis, :]
?print(flag)
?c = np.zeros(512)
?c[flag]=1
?c = c.reshape(1,1,1,512)
?#固定下一個(gè)維度的值
?temp = np.zeros(512)
?temp[flag+1]=start_distance
?temp = temp.reshape(1,1,1,512)
?
?l = np.linspace(start_distance, end_distance, step).reshape(
? ? ?[step if axis == 1 else 1 for axis in range(x.ndim)])
?results = np.tile(x, [step if axis == 1 else 1 for axis in range(x.ndim)])
?is_manipulatable = np.zeros(results.shape, dtype=bool)
?is_manipulatable[:, :, layer_indices] = True
?results = np.where(is_manipulatable, x + l * c+temp, results)
?assert results.shape == (num, step, num_layers, *code_shape)
?return results if layerwise_manipulation else results[:, :, 0]
#結(jié)果生成也要兩步 ?
? ?codes = manipulate(latent_codes=latent_codes,
? ? ? ? ? ? ? ? ? ? ? boundary=boundary,
? ? ? ? ? ? ? ? ? ? ? start_distance=args.start_distance,
? ? ? ? ? ? ? ? ? ? ? end_distance=args.end_distance,
? ? ? ? ? ? ? ? ? ? ? step=step,
? ? ? ? ? ? ? ? ? ? ? layerwise_manipulation=True,
? ? ? ? ? ? ? ? ? ? ? num_layers=generator.num_layers,
? ? ? ? ? ? ? ? ? ? ? manipulate_layers=manipulate_layers,
? ? ? ? ? ? ? ? ? ? ? is_code_layerwise=True,
? ? ? ? ? ? ? ? ? ? ? is_boundary_layerwise=True,
? ? ? ? ? ? ? ? ? ? ? flag=flag-1)
? ?for img_idx in tqdm(range(num_images), leave=False):
? ? ?output_images = generator.easy_synthesize(
? ? ? ? ?codes[img_idx], latent_space_type='wp')['image']
? ? ?for s, output_image in enumerate(output_images):
? ? ? ?visualizer.set_cell(img_idx, s + 3, image=output_image)
? ? ? ?save_image(f'{output_dir}/_AI'+str(img_idx)+"linear"+str(s+1)+'.png', output_image)
? ?# Save results.
? ?visualizer.save(f'{output_dir}/{job_name}_{flag}.html')
? ?#以上次的終點(diǎn)作為新的起點(diǎn)
? ?next_ = codes[0][step-1]
? ?next_codes = manipulate(latent_codes=next_[np.newaxis,:],
? ? ? ? ? ? ? ? ? ? ? boundary=boundary,
? ? ? ? ? ? ? ? ? ? ? start_distance=0,
? ? ? ? ? ? ? ? ? ? ? end_distance=args.end_distance-args.start_distance,
? ? ? ? ? ? ? ? ? ? ? step=step,
? ? ? ? ? ? ? ? ? ? ? layerwise_manipulation=True,
? ? ? ? ? ? ? ? ? ? ? num_layers=generator.num_layers,
? ? ? ? ? ? ? ? ? ? ? manipulate_layers=manipulate_layers,
? ? ? ? ? ? ? ? ? ? ? is_code_layerwise=True,
? ? ? ? ? ? ? ? ? ? ? is_boundary_layerwise=True,
? ? ? ? ? ? ? ? ? ? ? flag=flag)
? ?for img_idx in tqdm(range(num_images), leave=False):
? ? ?output_images = generator.easy_synthesize(
? ? ? ? ?next_codes[img_idx], latent_space_type='wp')['image']
? ? ?for s, output_image in enumerate(output_images):
? ? ? ?save_image(f'{output_dir}/_AI'+str(img_idx)+"linear"+str(s+1+step)+'.png', output_image)
? ?
(五)一些基本命令
1、winscp傳輸實(shí)在太慢了,所以嘗試直接用命令行打包傳輸
#將結(jié)果打包為.tar文件
tar -cvf data.tar results