最美情侣中文字幕电影,在线麻豆精品传媒,在线网站高清黄,久久黄色视频

歡迎光臨散文網(wǎng) 會(huì)員登陸 & 注冊(cè)

高分生信SCI-WGCNA分析復(fù)現(xiàn)

2023-07-18 15:47 作者:爾云間  | 我要投稿

今天小云給小伙伴帶來(lái)的分享是啥呢?

今天小云將復(fù)現(xiàn)一篇8+生信文章中的WGCNA分析,通過(guò)該推文將掌握如何利用自己的數(shù)據(jù)進(jìn)行WGCNA分析,來(lái)進(jìn)行與表型或者疾病相關(guān)候選基因的挖掘,小云覺(jué)得WGCNA是一個(gè)非常不錯(cuò)的候選基因挖掘的方法,值得小伙伴們認(rèn)真學(xué)習(xí),接下來(lái)跟著小云開(kāi)始今天的分享吧!

1.WGCNA介紹

在開(kāi)始分析之前,小云想為小伙伴簡(jiǎn)單介紹一下WGCNA,WGCNA是一種用于基因共表達(dá)的方法,它可以將高通量基因表達(dá)數(shù)據(jù)轉(zhuǎn)化為一個(gè)共表達(dá)網(wǎng)絡(luò),從而發(fā)現(xiàn)具有相似表達(dá)模式的基因模塊,以及研究這些模塊與表型之間的相關(guān)性,最終挖掘與表型相關(guān)的候選基因。這就是小云對(duì)WGCNA分析原理的簡(jiǎn)單介紹,是不是通俗易懂奧!其實(shí)WGCNA分析很簡(jiǎn)單,小伙伴不要因?yàn)榉治霾襟E多而不敢嘗試,只需要輸入基因表達(dá)矩陣和對(duì)應(yīng)樣本表型數(shù)據(jù)文件,就可以完成分析,有需要的小伙伴可以跟著小云開(kāi)始今天的實(shí)操。

2.準(zhǔn)備需要的R包

#安裝需要的R包

BiocManager::install("WGCNA")

#加載需要的R包

library(WGCNA)

3.WGCNA分析

#讀取表達(dá)矩陣,行名為基因,列名為樣本信息

expr<-read.table("combined.expr.txt",header=T,sep="\t")

#行列轉(zhuǎn)置

mydata<-expr

expr2<-data.frame(t(mydata))

#確定expr2列名

colnames(expr2)<-rownames(mydata)

#確定expr2行名

rownames(expr2)<-colnames(mydata)

#基因過(guò)濾

dataExpr1<-expr2

gsg=goodSamplesGenes(dataExpr1,verbose=3);

gsg$allOK

if (!gsg$allOK){

??# Optionally, print the gene and sample names that were removed:

??if (sum(!gsg$goodGenes)>0)

????printFlush(paste("Removing genes:",

?????????????????????paste(names(dataExpr)[!gsg$goodGenes], collapse = ",")));

??if (sum(!gsg$goodSamples)>0)

????printFlush(paste("Removing samples:",

?????????????????????paste(rownames(dataExpr)[!gsg$goodSamples], collapse = ",")));

??# Remove the offending genes and samples from the data:

??dataExpr1 = dataExpr1[gsg$goodSamples, gsg$goodGenes]

}

#基因數(shù)目

nGenes = ncol(dataExpr1)

#樣本數(shù)目

nSamples = nrow(dataExpr1)

#導(dǎo)入表型數(shù)據(jù),第一列為樣本信息,其他列為表型數(shù)據(jù),需要注意的是表型數(shù)據(jù)要與樣本數(shù)據(jù)一一對(duì)應(yīng)

traitData=read.table("TraitData.txt",header=T,sep="\t",row.names=1)

#提取表達(dá)矩陣樣本名

fpkmSamples<-rownames(dataExpr1)

#提取表型文件樣本名

traitSamples<-rownames(traitData)

#表達(dá)矩陣樣本名順序匹配表型文件樣本名

traitRows<-match(fpkmSamples,traitSamples)

#獲得匹配好樣本順序的表型文件

dataTraits<-traitData[traitRows,]

#繪制樹(shù)+表型熱圖

sampleTree2<-hclust(dist(dataExpr1),method="average")

traitColors<-numbers2colors(dataTraits,signed=FALSE)

png("sample-subtype-cluster.png",width = 800,height = 600)

plotDendroAndColors(sampleTree2,traitColors,groupLabels=names(dataTraits),main="Sample dendrogram and trait heatmap",

??????????cex.colorLabels=1.5,cex.dendroLabels=1,cex.rowText=2)

dev.off()

#計(jì)算合適的power值,并繪制Power值曲線圖

powers = c(c(1:10), seq(from = 12, to=20, by=2))

sft = pickSoftThreshold(dataExpr1, powerVector = powers, verbose = 5)

#設(shè)置網(wǎng)絡(luò)構(gòu)建參數(shù)選擇范圍,計(jì)算無(wú)尺度分布拓?fù)渚仃?/p>

png("step2-beta-value.png",width = 800,height = 600)

par(mfrow = c(1,2));

cex1 = 0.9;

??# Scale-free topology fit index as a function of the soft-thresholding power

plot(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2],

???????xlab="Soft Threshold (power)",ylab="Scale Free Topology Model Fit,signed R^2",type="n",

???????main = paste("Scale independence"));

text(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2],

???????labels=powers,cex=cex1,col="red");

??# this line corresponds to using an R^2 cut-off of h

??abline(h=0.90,col="red")

??# Mean connectivity as a function of the soft-thresholding power

??plot(sft$fitIndices[,1], sft$fitIndices[,5],

???????xlab="Soft Threshold (power)",ylab="Mean Connectivity", type="n",

???????main = paste("Mean connectivity"))

??text(sft$fitIndices[,1], sft$fitIndices[,5], labels=powers, cex=cex1,col="red")

dev.off()

#一步法構(gòu)建共表達(dá)網(wǎng)絡(luò)

##需要調(diào)用WGCNA包自帶的cor函數(shù),不然會(huì)發(fā)生報(bào)錯(cuò)奧!

cor<-WGCNA::cor

##在進(jìn)行共表達(dá)網(wǎng)絡(luò)構(gòu)建時(shí),power值的選擇非常重要,最影響結(jié)果的一個(gè)參數(shù),需要經(jīng)過(guò)多次嘗試,才能找到最適合的。

net = blockwiseModules(dataExpr1, power = 7, maxBlockSize = nGenes,

???????????????????????TOMType ='unsigned', minModuleSize = 30,

???????????????????????reassignThreshold = 0, mergeCutHeight = 0.25,

???????????????????????numericLabels = TRUE, pamRespectsDendro = FALSE,

???????????????????????saveTOMs=TRUE, saveTOMFileBase = "drought",

???????????????????????verbose = 3)

table(net$colors)

cor<-stats::cor

#繪制基因聚類(lèi)樹(shù)和模塊顏色組合

# Convert labels to colors for plotting

moduleLabels = net$colors

moduleColors = labels2colors(moduleLabels)

# Plot the dendrogram and the module colors underneath

png("step4-genes-modules.png",width = 800,height = 600)

plotDendroAndColors(net$dendrograms[[1]], moduleColors[net$blockGenes[[1]]],

????????????????????"Module colors",

????????????????????dendroLabels = FALSE, hang = 0.03,

????????????????????addGuide = TRUE, guideHang = 0.05)

dev.off()

#計(jì)算模塊與性狀間的相關(guān)性及繪制相關(guān)性熱圖

nGenes = ncol(dataExpr1)

nSamples = nrow(dataExpr1)

design=read.table("TraitData.txt", sep = '\t', header = T, row.names = 1)

# Recalculate MEs with color labels

MEs0 = moduleEigengenes(dataExpr1, moduleColors)$eigengenes

##不同顏色的模塊的ME值矩 (樣本vs模塊)

MEs = orderMEs(MEs0);

moduleTraitCor = cor(MEs, design , use = "p");

moduleTraitPvalue = corPvalueStudent(moduleTraitCor, nSamples)

# Will display correlations and their p-values

textMatrix = paste(signif(moduleTraitCor, 2), "\n(",

?????????????????????signif(moduleTraitPvalue, 1), ")", sep = "");

dim(textMatrix) = dim(moduleTraitCor)

png("step5-Module-trait-relationships.png",width = 800,height = 1200,res = 120)

par(mar = c(6, 8.5, 3, 3));

# Display the correlation values within a heatmap plot

labeledHeatmap(Matrix = moduleTraitCor,

?????????????????xLabels = colnames(design),

?????????????????yLabels = names(MEs),

?????????????????ySymbols = names(MEs),

?????????????????colorLabels = FALSE,

?????????????????colors = greenWhiteRed(50),

?????????????????textMatrix = textMatrix,

?????????????????setStdMargins = FALSE,

?????????????????cex.text = 0.5,

?????????????????zlim = c(-1,1),

?????????????????main = paste("Module-trait relationships"))

dev.off()

#計(jì)算MM值和GS值并繪圖

##切割,從第三個(gè)字符開(kāi)始保存

modNames = substring(names(MEs), 3)

geneModuleMembership = as.data.frame(cor(dataExpr1, MEs, use = "p"));

## 算出每個(gè)模塊跟基因的皮爾森相關(guān)系數(shù)矩

## MEs是每個(gè)模塊在每個(gè)樣本里面皿

## dataExpr1是每個(gè)基因在每個(gè)樣本的表達(dá)量

MMPvalue = as.data.frame(

????corPvalueStudent(as.matrix(geneModuleMembership), nSamples) ##計(jì)算MM值對(duì)應(yīng)的P值

????);

names(geneModuleMembership) = paste("MM", modNames, sep=""); ##給MM對(duì)象統(tǒng)一賦名

names(MMPvalue) = paste("p.MM", modNames, sep=""); ##給MMPvalue對(duì)象統(tǒng)一賦名

##計(jì)算基因與每個(gè)性狀的顯著性(相關(guān)性)及pvalue值

geneTraitSignificance = as.data.frame(cor(dataExpr1, design, use = "p"));

GSPvalue = as.data.frame(

????corPvalueStudent(as.matrix(geneTraitSignificance), nSamples) ##計(jì)算GS值對(duì)應(yīng)的pvalue值

????);

names(geneTraitSignificance) = paste("GS.", colnames(design), sep="");

names(GSPvalue) = paste("p.GS.", colnames(design), sep="");

module = "brown"

column = match(module, modNames); ?##在所有模塊中匹配選擇的模塊,返回所在的位置

moduleGenes = moduleColors==module; ?##==匹配,匹配上的返回True,否則返回false

png("step6-Module_membership-gene_significance.png",width = 800,height = 600)

par(mfrow = c(1,1)); ##設(shè)定輸出圖片行列數(shù)量,(1_1)代表行一個(gè),列一丿

verboseScatterplot(abs(geneModuleMembership[moduleGenes, column]), ## 繪制MM和GS散點(diǎn)囿

?????????????????????abs(geneTraitSignificance[moduleGenes, 1]),

?????????????????????xlab = paste("Module Membership in", module, "module"),

?????????????????????ylab = "Gene significance for Basal",

?????????????????????main = paste("Module membership vs. gene significance\n"),

?????????????????????cex.main = 1.2, cex.lab = 1.2, cex.axis = 1.2, col = module)

dev.off()

#繪制TOM熱圖+模塊性狀組合圖

geneTree = net$dendrograms[[1]]; ?##提取基因聚類(lèi)

dissTOM = 1-TOMsimilarityFromExpr(dataExpr1, power = 7); ?##計(jì)算TOM距離

plotTOM = dissTOM^7; ##通過(guò)7次方處理進(jìn)行標(biāo)準(zhǔn)化,降低基因間的誤差

diag(plotTOM) = NA; ?##替換斜對(duì)角矩陣中的內(nèi)容為NA

TOMplot(plotTOM, geneTree, moduleColors, main = "Network heatmap plot, all genes")

nSelect = 400 ?##挑選需要提取的基因

set.seed(10); ?##設(shè)置隨機(jī)種子數(shù),保證選取的隨機(jī)性

select = sample(nGenes, size = nSelect); ?##仿5000個(gè)基因中隨機(jī)叿400丿

selectTOM = dissTOM[select, select]; ?##提取挑出來(lái)的基因的TOM距離

# There’s no simple way of restricting a clustering tree to a subset of genes, so we must re-cluster.

selectTree = hclust(as.dist(selectTOM), method = "average") ?##對(duì)挑出來(lái)對(duì)基因TOM距離重新聚類(lèi)

selectColors = moduleColors[select];

# Open a graphical window

sizeGrWindow(9,9)

# Taking the dissimilarity to a power, say 10, makes the plot more informative by effectively changing

# the color palette; setting the diagonal to NA also improves the clarity of the plot

plotDiss = selectTOM^7; ##對(duì)挑選出對(duì)基因TOM距離7次方處理

diag(plotDiss) = NA; ##替換斜對(duì)角矩陣中的內(nèi)容為NA

png("step7-Network-heatmap.png",width = 800,height = 600)

TOMplot(plotDiss, selectTree, selectColors, ?##繪制TOM圖

??????????main = "Network heatmap plot, selected genes")

dev.off()

# 模塊性狀組合圖

RA = as.data.frame(design[,2]); ##提取特定性狀

names(RA) = "RA"

# Add the weight to existing module eigengenes

MET = orderMEs(cbind(MEs, RA))

# Plot the relationships among the eigengenes and the trait

sizeGrWindow(5,7.5);

par(cex = 0.9)

png("step7-Eigengene-dendrogram.png",width = 800,height = 600)

plotEigengeneNetworks(MET, "", ??##繪制模塊聚類(lèi)圖和熱圖

????????????????????????marDendro =c(0,5,1,5), ?##樹(shù)類(lèi)圖區(qū)間大小誰(shuí)宿

????????????????????????marHeatmap = c(5,6,1,2), cex.lab = 0.8, ##樹(shù)類(lèi)圖區(qū)間大小誰(shuí)宿

????????????????????????xLabelsAngle = 90) ?##軸鮮艿90庿

dev.off()

#導(dǎo)出單個(gè)模塊的網(wǎng)絡(luò)結(jié)果

TOM = TOMsimilarityFromExpr(dataExpr1, power = 7); ?##需要較長(zhǎng)時(shí)間

# 選擇brown模塊

module = "brown";

# Select module probes

probes = colnames(dataExpr1)

inModule = (moduleColors==module);

## 提取指定模塊的基因名

modProbes = probes[inModule];

modTOM = TOM[inModule, inModule];

dimnames(modTOM) = list(modProbes, modProbes)

## 模塊對(duì)應(yīng)的基因關(guān)系矩

cyt = exportNetworkToCytoscape( ?##導(dǎo)出單個(gè)模塊的Cytoscape輸入文件

????modTOM,

????edgeFile = paste("CytoscapeInput-edges-", paste(module, collapse="-"), ".txt", sep=""), ##輸出邊文件名

????nodeFile = paste("CytoscapeInput-nodes-", paste(module, collapse="-"), ".txt", sep=""), ##輸出點(diǎn)文件名

????weighted = TRUE,

????threshold = 0.02,

????nodeNames = modProbes,

????nodeAttr = moduleColors[inModule]

??)

CytoscapeInput-edges-brown.txt,邊文件

CytoscapeInput-nodes-brown.txt,點(diǎn)文件

最終小云成功完成了WGCNA分析,基本達(dá)到了復(fù)現(xiàn),今天小云的分享就到這里啦!

如果小伙伴有其他數(shù)據(jù)分析需求,可以嘗試本公司新開(kāi)發(fā)的生信分析小工具云平臺(tái),零代碼完成分析,非常方便奧,

云平臺(tái)網(wǎng)址為:http://www.biocloudservice.com/home.html,

包括了WGCNA分析(http://www.biocloudservice.com/336/336.php),

GEO數(shù)據(jù)下載(http://www.biocloudservice.com/371/371.php)等小工具,

歡迎大家和小云一起討論學(xué)習(xí)哈?。。?!








高分生信SCI-WGCNA分析復(fù)現(xiàn)的評(píng)論 (共 條)

分享到微博請(qǐng)遵守國(guó)家法律
甘肃省| 平陆县| 安远县| 镇宁| 特克斯县| 青岛市| 宜春市| 九江县| 云霄县| 泰兴市| 阿荣旗| 柏乡县| 藁城市| 万载县| 陈巴尔虎旗| 会昌县| 霍州市| 崇礼县| 横山县| 唐河县| 金湖县| 彩票| 勐海县| 江陵县| 苍梧县| 金堂县| 阜阳市| 巴楚县| 武穴市| 上栗县| 梧州市| 江口县| 垣曲县| 赤峰市| 百色市| 晋江市| 筠连县| 奈曼旗| 额济纳旗| 裕民县| 盘锦市|