最美情侣中文字幕电影,在线麻豆精品传媒,在线网站高清黄,久久黄色视频

歡迎光臨散文網(wǎng) 會員登陸 & 注冊

「生信技能樹」三陰性乳腺癌表達(dá)矩陣探索

2021-11-14 11:09 作者:zlzrzp  | 我要投稿

操作數(shù)據(jù)與視頻案例不一樣,已根據(jù)實(shí)際情況改動,請注意契合度。


library("GEOquery")

load("GSE55235.Rdata")


a=gset[[1]]

dat=exprs(a)

pd=pData(a)

#這里跟案例操作不一樣,因?yàn)槲矣玫腉SE里有三種類型數(shù)據(jù),所以我選取了其中兩種


RA=rownames(pd[grepl('disease state: rheumatoid arthritis',as.character(pd$characteristics_ch1)),])

NOR=rownames(pd[grepl('disease state: healthy control',as.character(pd$characteristics_ch1)),])

dat=dat[,c(RA,NOR)]

group_list=c(rep("RA",times=10),rep("NOR",times=10))

save(dat,group_list,file="expression and pd")


rm(list = ls())

#PCA

load(file = "expression and pd")

dat=t(dat)

dat=as.data.frame(dat)

dat=cbind(dat,group_list)??

library("ggplot2")

library("factoextra")

library("FactoMineR")

#the the variable group_list (index =22284)is removed?

dat.pca <- PCA(dat[,-22284],graph=FALSE)

fviz_pca_ind(dat.pca,

???????geom.ind = "point",

???????col.ind = dat$group_list,

???????palette = c("#00AFBB","#E7B800"),

???????addEllipses = TRUE,

???????legend.title = "Groups"

)



rm(list = ls())

# heatmap

load(file = "expression and pd")

cg=names(tail(sort(apply(dat,1,sd)),20))

library(pheatmap)

n=t(scale(t(dat[cg,])))

#讓圖區(qū)別明顯

n[n>2]=2

n[n<-2]=-2

pheatmap(n,show_rownames = F, show_colnames = F)

#加分組信息

ac=data.frame(g=group_list)

row.names(ac)=colnames(n)

pheatmap(n,annotation_col = ac)


#boxplot

rm(list = ls())

load(file = "expression and pd")

library("ggplot2")

library("ggpubr")

bp=function(g){library("ggplot2")

?library("ggpubr")

?df=data.frame(gene=g,stage=group_list)

?p <- ggboxplot(df,x="stage",y="gene",

????????color="stage",palette="jco",

????????add="jitter")

?#add p value

?p + stat_compare_means()

}

bp(dat[1,])

bp(dat[2,])

rm(list = ls())

#差異分析

load(file = "expression and pd")


library("limma")

design=model.matrix(~factor(group_list))

fit=lmFit(dat,design)

fit=eBayes(fit)

options(digits = 4)

topTable(fit,coef=2,adjust="BH")

deg=topTable(fit,coef=2,adjust="BH",number = Inf)

save(deg,file = "P value?")


rm(list = ls())

#基因ID轉(zhuǎn)換

load(file = "P value")

deg$probe_id=rownames(deg)

library("hgu133plus2.db")

ids=toTable(hgu133plus2SYMBOL)


deg=merge(deg,ids,by="probe_id")

save(deg,file = "symbol")


#繪制火山圖

if(T){

?nrDEG=deg

?head(nrDEG)

?attach(nrDEG)

?plot(logFC,-log10(P.Value))

?library("ggplot2")

?library("ggpubr")

?df=nrDEG

?df$v=-log10(P.Value)

?ggscatter(df,x="logFC",y="v",size=0.5)

??

?df$g=ifelse(df$P.Value>0.01,"stable",

???????ifelse(df$logFC>2,"up",

???????????ifelse(df$logFC<-2,"down","stable"))

?)

?table(df$g)

?df$name=rownames(df)

?ggscatter(df,x="logFC",y="v",size=0.5,color="g")

?ggscatter(df,x="logFC",y="v",color="g",size=0.5,

??????label="symbol",repel=T,

??????label.select=c("MMP3","MT1X"),

??????palette=c("#00AFBB","#E78800","#FC4E07"))

??

?ggscatter(df,x="AveExpr",y="logFC",size=0.2)

?df$p_c=ifelse(df$P.Value<0.001,"p<0.001",

????????ifelse(df$P.Value<0.01,"0.001<p<0.01","p>0.01"))

?table(df$p_c)

?ggscatter(df,x="AveExpr",y="logFC",color="p_c",size=0.2,

??????palette=c("green","red","black"))

??

}

#繪制熱圖


if(T){

?#熱圖需要矩陣數(shù)據(jù)

?load(file = "expression and pd")

?load(file = "P value")

?dat[1:4,1:4]

?#取探針名

?x=deg$logFC

?deg$probe_id=rownames(deg)

?names(x)=deg$probe_id

?#根據(jù)logFC大小來取探針名;head前多少;tail后多少

?cg=c(names(head(sort(x),100)),

????names(tail(sort(x),100)))

?library(pheatmap)

?pheatmap(dat[cg,],show_colnames=F,show_rownames=F)

?#歸一化后畫圖

?n=t(scale(t(dat[cg,])))

?n[n>2]=2

?n[n<-2]=-2

?n[1:4,1:4]

?pheatmap(n,show_colnames=F,show_rownames=F)

?#加回group_list

?ac=data.frame(g=group_list)

?rownames(ac)=colnames(n)

?pheatmap(n,show_colnames=F,

??????show_rownames=F,

??????#不排序就是F,默認(rèn)排序T

??????cluster_cols=F,

??????annotation_col=ac)

}



rm(list = ls())

#注釋

load(file = "symbol")

head(deg)

##這里出大問題,我的logFC運(yùn)行代碼后全部變成閾值了,待解決;然后我繞了一下路采用輔助列恢復(fù)原值

logFC_t=648.43

logFC_true=deg$logFC


deg=cbind(deg,logFC_true)

deg$g=ifelse(deg$P.Value>0.01,"stable",

???????ifelse( deg$logFC >logFC_t,"up",

??????????ifelse( deg$logFC<-logFC_t,"down","stable"))

)

table(deg$g)

deg$logFC=deg$logFC_true

deg=deg[,-9]


#有些包不需要加載,同時關(guān)系到下面的一些函數(shù),發(fā)現(xiàn)函數(shù)找不到時嘗試重新加載包

library("ggplot2")

library("clusterProfiler")

library("AnnotationDbi")

library("stats4")

library("BiocGenerics")

library("org.Hs.eg.db")

df <- bitr(unique(deg$symbol),fromType="SYMBOL",

??????toType=c("ENTREZID"),

??????OrgDb=org.Hs.eg.db)


head(df)

head(deg)

deg$SYMBOL=rownames(deg)

deg=merge(deg,df,by.y="SYMBOL",by.x="symbol")

head(deg)


gene_up=deg[deg$g=="up","ENTREZID"]

gene_down=deg[deg$g=="down","ENTREZID"]

gene_diff=c(gene_up,gene_down)

#

gene_all=as.character(deg[,"ENTREZID"])

data(geneList,package = "DOSE")

head(geneList)

#這里兩個圖還沒有理解

boxplot(geneList)

boxplot(deg$logFC)


geneList=deg$logFC

names(geneList)=deg$ENTREZID

geneList=sort(geneList,decreasing = T)


## KEGG pathway analysis

if(T){

?### over=representation test

?kk.up <- enrichKEGG(gene = gene_up,

???????????organism = "hsa",

???????????universe = gene_all,

???????????pvalueCutoff = 0.9,

???????????qvalueCutoff = 0.9)

?head(kk.up)[,1:6]

?kk.down <- enrichKEGG(gene = gene_down,

????????????organism = "hsa",

????????????universe = gene_all,

????????????pvalueCutoff = 0.05)

?head(kk.down)[,1:6]

?kk.diff <- enrichKEGG(gene = gene_diff,

????????????organism = "hsa",

????????????pvalueCutoff = 0.05)

?head(kk.diff)[,1:6]

??

?kegg_diff_dt <- as.data.frame(kk.diff)

?kegg_down_dt <- as.data.frame(kk.down)

?kegg_up_dt <- as.data.frame(kk.up)

?down_kegg <- kegg_down_dt[kegg_down_dt$pvalue<0.05,];down_kegg$group=-1

?up_kegg <- kegg_up_dt[kegg_up_dt$pvalue<0.05,];up_kegg$group=1

?#這里的自定義函數(shù)指路https://www.jianshu.com/p/0dd8f42a92ba

?

?source("functions.R")

?g_kegg=kegg_plot(up_kegg,down_kegg)

?print(g_kegg)

??

?ggsave(g_kegg,filename="kegg_up_down.png")

??

?#GSEA

?#這里報(bào)錯不用管

?kk_gse <- gseKEGG(geneList = geneList,

??????????organism = "hsa",

??????????verbose = FALSE)

?head(kk_gse)[,1:6]

?gseaplot(kk_gse,geneSetID = rownames(kk_gse[1,]))

??

?down_kegg <- kk_gse[kk_gse$pvalue<0.05&kk_gse$enrichmentScore<0,];down_kegg$group

?up_kegg <- kk_gse[kk_gse$pvalue<0.05&kk_gse$enrichmentScore<0,];up_kegg$group

??

?source("functions.R")

?#這里有錯,待解決:Error in `$<-.data.frame`(`*tmp*`, "pvalue", value = numeric(0)) : 替換數(shù)據(jù)里有0行,但數(shù)據(jù)有4


?g_kegg=kegg_plot(up_kegg,down_kegg)

?print(g_kegg)

?ggsave(g_kegg,filename = "kegg_up_down_gsea.png")

??

}


#GO databases analysis

{

g_list=list(gene_up=gene_up,

??????gene_down=gene_down,

??????gene_diff=gene_diff)

if(F){

?go_enrich_results <- lapply(g_list,function(gene){

??lapply(c("BP","MF","CC"),function(ont){

???cat(paste("Now process",ont))

???ego <- enrichGO(gene = gene,

???????????universe = gene_all,

???????????OrgDb = org.Hs.eg.db,

???????????ont = ont,

???????????pAdjustMethod = "BH",

???????????pvalueCutoff = 0.99,

???????????qvalueCutoff = 0.99,

???????????readable = TRUE)

???print(head(ego))

???return(ego)

??})

?})

}

}


#根據(jù)需要畫圖

rm(list = ls())

options(stringsAsFactors = F)

load(file = "expression and pd")

dat[1:4,1:4]

library("hgu133plus2.db")

p2s=toTable(hgu133plus2SYMBOL)


a=p2s$symbol %in% c()

np=p2s[a,1]

ng=p2s[a,2]

dat[np,1:4]

x=dat[np,]

rownames(x)=paste(ng,np,sep = ":")

library('pheatmap')

tmp=data.frame(group=group_list)

rownames(tmp)=colnames(x)

pheatmap(x,annotation_col=tmp)


「生信技能樹」三陰性乳腺癌表達(dá)矩陣探索的評論 (共 條)

分享到微博請遵守國家法律
江华| 治县。| 伊春市| 三亚市| 东乡| 台东市| 富蕴县| 聂荣县| 广丰县| 临武县| 夹江县| 五原县| 邵阳县| 文成县| 威信县| 班戈县| 金山区| 龙井市| 安阳市| 卢龙县| 崇信县| 南召县| 甘德县| 揭阳市| 屯门区| 会泽县| 高邮市| 泊头市| 望城县| 鹤壁市| 兴文县| 宜宾市| 泽州县| 临安市| 克山县| 堆龙德庆县| 开鲁县| 富阳市| 清水县| 兰州市| 清远市|