WGCNA學(xué)習(xí)：WGCNA分析實戰(zhàn)

2020-11-10 21:02 作者:上天的小釗 0人讀過 | 我要投稿

1.WGCNA安裝

> install.packages("BiocManager")

> BiocManager::install("WGCNA")

> library(WGCNA)

2.數(shù)據(jù)準備與讀入

2.1數(shù)據(jù)準備

需要兩個數(shù)據(jù)

表達矩陣（All_fpkm.list）

表型文件（pheno.txt）,需要注意表型文件分為兩類，連續(xù)變量型與分類變量型。

2.2 數(shù)據(jù)讀入

library(WGCNA)

library(reshape2)

library(stringr)

setwd('D:/data/wgcna/Categorical')

options(stringsAsFactors = T)# 在讀入數(shù)據(jù)時，遇到字符串后，將其轉(zhuǎn)換成因子，連續(xù)型變量要改為FALSE

enableWGCNAThreads()#打開多線程

##====================step 1 :數(shù)據(jù)讀入

RNAseq_voom <- fpkm ## 因為WGCNA針對的是基因進行聚類，而一般我們的聚類是針對樣本用hclust即可，所以這個時候需要轉(zhuǎn)置

WGCNA_matrix = t(RNAseq_voom[order(apply(RNAseq_voom,1,mad), decreasing = T)[1:5000],])

datExpr <- WGCNA_matrix ?## top 5000 mad genes #明確樣本數(shù)和基因

nGenes = ncol(datExpr)

nSamples = nrow(datExpr) #首先針對樣本做個系統(tǒng)聚類 datExpr_tree<-hclust(dist(datExpr), method = "average")

par(mar = c(0,5,2,0))

png("img/step1-sample-cluster.png",width = 800,height = 600)

plot(datExpr_tree, main = "Sample clustering", sub="", xlab="", cex.lab = 2, ? ? ? cex.axis = 1, cex.main = 1,cex.lab=1)

dev.off()

3. β值估計

##====================step 2：β值確定====

datExpr[1:4,1:4]

powers = c(c(1:10), seq(from = 12, to=20, by=2)) #設(shè)置beta值的取值范圍 sft = pickSoftThreshold(datExpr, RsquaredCut = 0.9,powerVector = powers, verbose = 5) #設(shè)置網(wǎng)絡(luò)構(gòu)建參數(shù)選擇范圍，計算無尺度分布拓撲矩陣

png("img/step2-beta-value.png",width = 800,height = 600)

par(mfrow = c(1,2));

cex1 = 0.9;

# Scale-free topology fit index as a function of the soft-thresholding power

plot(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2], ? ? ??? xlab="Soft Threshold (power)",ylab="Scale Free Topology Model Fit,signed??

??? R^2",type="n", ? ? ?

??? main = paste("Scale independence"));

text(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2], ? ? ?? labels=powers,cex=cex1,col="red");

# this line corresponds to using an R^2 cut-off of h

abline(h=0.90,col="red")

# Mean connectivity as a function of the soft-thresholding power plot(sft$fitIndices[,1], sft$fitIndices[,5],xlab="Soft Threshold (power)",ylab="Mean Connectivity", type="n",main = paste("Mean connectivity"))

text(sft$fitIndices[,1], sft$fitIndices[,5], labels=powers, cex=cex1,col="red") dev.off()

可以確定最佳β值為6

4. 一步法構(gòu)建共表達矩陣

最核心的一步，同時也是最耗費計算資源的一步

##====================step 3:自動構(gòu)建WGCNA模型==================

# 首先是一步法完成網(wǎng)絡(luò)構(gòu)建

net = blockwiseModules( ?datExpr, ?power = sft$powerEstimate, ? ? ? ? ? ? #軟閾值，前面計算出來的 ?maxBlockSize = 6000, ? ? ? ? ? ? ? ? ? #最大block大小，將所有基因放在一個block中 ?TOMType = "unsigned", ? ? ? ? ? ? ? ? ?#選擇unsigned，使用標準TOM矩陣 ?deepSplit = 2, minModuleSize = 30, ? ? #剪切樹參數(shù)，deepSplit取值0-4 ?mergeCutHeight = 0.25, ? ? ? ? ? ? ? ? # 模塊合并參數(shù)，越大模塊越少 ?numericLabels = TRUE, ? ? ? ? ? ? ? ? ?# T返回數(shù)字，F(xiàn)返回顏色 ?pamRespectsDendro = FALSE, ? ?saveTOMs = TRUE, ?saveTOMFileBase = "FPKM-TOM", ?loadTOMs = TRUE, ?verbose = 3)