R语言实现基因组的可视化

时间:2022-07-28
本文章向大家介绍R语言实现基因组的可视化,主要内容包括其使用实例、应用技巧、基本知识点总结和需要注意事项,具有一定的参考价值,需要的朋友可以参考一下。
基因组的可视化展示大家应该都熟悉,今天给大家看一下在R语言中的一个用来进行基因组可视化的包Sushi。一些基础的理论就不再赘述了,首先我们看下包的安装:
BiocManager::install("Sushi")

接下来我们直接通过实例来看下包中的各种展示方式:

1. 包支持的数据输入类型

library('Sushi')
Sushi_data = data(package = 'Sushi')
data(list = Sushi_data$results[,3])

2. 信号轨迹图,所需的基本参数包括要绘制的数据、染色体和开始和停止位置。

chrom = "chr11"
chromstart = 1650000
chromend = 2350000
plotBedgraph(Sushi_DNaseI.bedgraph,chrom,chromstart,chromend,colorbycol=SushiColors(5))

当然,我们也可以展示各信号的位置信息以及坐标轴值:

labelgenome(chrom,chromstart,chromend,n=4,scale="Mb")
mtext("ReadDepth",side=2,line=1.75,cex=1,font=2)
axis(side=2,las=2,tcl=.2)

如果多个序列同时显示,那就需要设置参数overlay=TRUE,同时如果想多个序列坐标轴范围一致需要用到rescaleoverlay=TRUE

chrom = "chr11"
chromstart = 1955000
chromend = 1960000
plotBedgraph(Sushi_ChIPSeq_CTCF.bedgraph,chrom,chromstart,chromend,transparency=.50,color=SushiColors(2)(2)[1])
plotBedgraph(Sushi_DNaseI.bedgraph,chrom,chromstart,chromend,transparency=.50,color=SushiColors(2)(2)[2],overlay=TRUE,rescaleoverlay=TRUE)
labelgenome(chrom,chromstart,chromend,n=3,scale="Kb")
legend("topright",inset=0.025,legend=c("DNaseI","ChIP-seq(CTCF)"), fill=opaque(SushiColors(2)(2)),border=SushiColors(2)(2),text.font=2,cex=1.0)
#独立展示对比效果需要设置参数flip=TRUE
par(mfrow=c(2,1),mar=c(1,4,1,1))
plotBedgraph(Sushi_ChIPSeq_CTCF.bedgraph,chrom,chromstart,chromend,transparency=.50,color=SushiColors(2)(2)[1])
axis(side=2,las=2,tcl=.2)
mtext("ReadDepth",side=2,line=1.75,cex=1,font=2)
legend("topright",inset=0.025,legend=c("DNaseI","ChIP-seq(CTCF)"),fill=opaque(SushiColors(2)(2)),border=SushiColors(2)(2),text.font=2, cex=1.0)
 
plotBedgraph(Sushi_DNaseI.bedgraph, chrom,chromstart, chromend, transparency=.50, flip=TRUE, color=SushiColors(2)(2)[2])
labelgenome(chrom,chromstart,chromend,side=3,n=3,scale="Kb")
axis(side=2,las=2,tcl=.2,at=pretty(par("yaxp")[c(1,2)]),labels=-1*pretty(par("yaxp")[c(1,2)]))
mtext("ReadDepth",side=2,line=1.75,cex=1,font=2)

3. Hi-C测序结果的染色体互作可视化。Hi-C可以展现出整个染色体all-to-all的互作关系。

chrom = "chr11"
chromstart = 500000
chromend = 5050000
phic = plotHic(Sushi_HiC.matrix,chrom,chromstart, chromend, max_y = 20,zrange=c(0,28), palette=SushiColors(7))
addlegend(phic[[1]], palette=phic[[2]],title="score", side="right", bottominset=0.4, topinset=0,xoffset=-.035, labelside="left", width=0.025, title.offset=0.035)
labelgenome(chrom, chromstart, chromend,n=4, scale="Mb", edgeblankfraction=0.20)

此图和热图想表达的意义是一样的根据颜色的变化可以看出各位点之间相关性打分的大小。当然,还可以对其中的一些细节可以进一步的操作,我们直接看下实例:

chrom = "chr11"
chromstart = 500000
chromend = 5050000
phic =plotHic(Sushi_HiC.matrix,chrom,chromstart,chromend,max_y = 20,zrange=c(0,28),flip=TRUE,palette=topo.colors)
addlegend(phic[[1]],palette=phic[[2]],title="score",side="left",bottominset=0.1,
topinset=0.5,xoffset=-.035,labelside="right",width=0.025,title.offset=0.035)
labelgenome(chrom,chromstart,chromend,side=3,n=4,scale="Mb",edgeblankfraction=0.20)

4. bedpe格式数据的可视化展示

chrom = "chr11"
chromstart = 1650000
chromend = 2350000
pbpe =plotBedpe(Sushi_5C.bedpe,chrom,chromstart,chromend,heights =Sushi_5C.bedpe$score,plottype="loops",colorby=Sushi_5C.bedpe$samplenumber,colorbycol=SushiColors(3))
labelgenome(chrom,chromstart,chromend,n=3,scale="Mb")
legend("topright",inset=0.01,legend=c("K562","HeLa","GM12878"),col=SushiColors(3)(3),pch=19,bty='n',text.font=2)
axis(side=2,las=2,tcl=.2)
mtext("Z-score",side=2,line=1.75,cex=.75,font=2)

上图中的拱形的高度代表了Z-score of the 5C interaction。不同的颜色代表不同的细胞系,线的粗细是一个恒量。

#绘制线型的可视化结果:
chrom = "chr11"
chromstart = 1650000
chromend = 2350000
pbpe =plotBedpe(Sushi_5C.bedpe,chrom,chromstart,chromend,flip=TRUE,plottype="lines",colorby=Sushi_5C.bedpe$score,colorbycol=SushiColors(5))
labelgenome(chrom,chromstart,chromend,side=3,n=3,scale="Mb")
addlegend(pbpe[[1]],palette=pbpe[[2]],title="Z-score",side="right",bottominset=0.05,
topinset=0.05,xoffset=-.035,labelside="right",width=0.025,title.offset=0.045)

上图就是将拱形进行直线化,直接通过直线将两个互作的片段之间进行连线,并通过颜色代表Z-score大小。

5. 对bed格式的chip数据的可视化展示。

chrom = "chr11"
chromstart = 2281200
chromend = 2282200
plotBed(beddata = Sushi_ChIPSeq_pol2.bed,chrom = chrom,chromstart =chromstart,
chromend =chromend,colorby = Sushi_ChIPSeq_pol2.bed$strand,colorbycol= SushiColors(2),row = "auto",wiggle=0.001)
labelgenome(chrom,chromstart,chromend,n=2,scale="Kb")
legend("topright",inset=0,legend=c("reverse","forward"),fill=SushiColors(2)(2),
border=SushiColors(2)(2),text.font=2,cex=0.75)

上图通过颜色将strand进行分别标记

#独立分割srand
chrom = "chr11"
chromstart = 2281200
chromend = 2282200
plotBed(beddata = Sushi_ChIPSeq_pol2.bed,chrom = chrom,chromstart =chromstart,
chromend =chromend,colorby = Sushi_ChIPSeq_pol2.bed$strand,colorbycol= SushiColors(2),row = "auto",wiggle=0.001,splitstrand=TRUE)
labelgenome(chrom,chromstart,chromend,n=2,scale="Kb")
legend("topright",inset=0,legend=c("reverse","forward"),fill=SushiColors(2)(2),
border=SushiColors(2)(2),text.font=2,cex=0.75)
#多样本数据的比较,点图和矩形图
Sushi_ChIPSeq_severalfactors.bed$color=maptocolors(Sushi_ChIPSeq_severalfactors.bed$row,col=SushiColors(6))
chrom = "chr15"
chromstart = 72800000
chromend = 73100000
plotBed(beddata =Sushi_ChIPSeq_severalfactors.bed,chrom = chrom,chromstart = chromstart,chromend=chromend,rownumber = Sushi_ChIPSeq_severalfactors.bed$row, type ="circles",color=Sushi_ChIPSeq_severalfactors.bed$color,row="given",plotbg="grey95",rowlabels=unique(Sushi_ChIPSeq_severalfactors.bed$name),rowlabelcol=unique(Sushi_ChIPSeq_severalfactors.bed$color),rowlabelcex=0.75)
labelgenome(chrom,chromstart,chromend,n=3,scale="Mb")
mtext("ChIP-seq",side=3,adj=-0.065,line=0.5,font=2)
plotBed(beddata =Sushi_ChIPSeq_severalfactors.bed,chrom = chrom,chromstart = chromstart,chromend=chromend,rownumber = Sushi_ChIPSeq_severalfactors.bed$row, type ="region",color=Sushi_ChIPSeq_severalfactors.bed$color,row="given",plotbg="grey95",rowlabels=unique(Sushi_ChIPSeq_severalfactors.bed$name),rowlabelcol=unique(Sushi_ChIPSeq_severalfactors.bed$color),rowlabelcex=0.75)
labelgenome(chrom,chromstart,chromend,n=3,scale="Mb")
mtext("ChIP-seq",side=3,adj=-0.065,line=0.5,font=2)

我们也可以通过位置信息获得我们想要的基因信息:

chrom = "chr15"
chromstart = 60000000
chromend = 80000000
chrom_biomart =gsub("chr","",chrom)
mart=useMart(host='may2009.archive.ensembl.org',biomart='ENSEMBL_MART_ENSEMBL', dataset='hsapiens_gene_ensembl')
geneinfobed = getBM(attributes =c("chromosome_name","start_position","end_position"),filters= c("chromosome_name","start","end"), values=list(chrom_biomart,chromstart,chromend),mart=mart)
geneinfobed[,1] =paste("chr",geneinfobed[,1],sep="")

那么接下来我们就可以绘制基因密度图,所谓基因密度是指一个基因中所含碱基的相对数量,相对数量越大,基因密度越大。比较难懂,通俗点就是在所提供的序列长度中包含的基因越多,那么基因密度越大。

plotBed(beddata =geneinfobed[!duplicated(geneinfobed),],chrom = chrom,chromstart =chromstart,chromend =chromend,row='supplied',palettes = list(SushiColors(7)),type = "density")
labelgenome(chrom, chromstart, chromend,n=4,scale="Mb",edgeblankfraction=0.10)
mtext("Gene Density",side=3,adj=0,line=0.20,font=2)

6. 曼哈顿图绘制

plotManhattan(bedfile=Sushi_GWAS.bed,pvalues=Sushi_GWAS.bed[,5],col=SushiColors(6),genome=Sushi_hg18_genome,cex=0.75)
labelgenome(genome=Sushi_hg18_genome,n=4,scale="Mb",edgeblankfraction=0.20,cex.axis=.5)
axis(side=2,las=2,tcl=.2)
mtext("log10(P)",side=2,line=1.75,cex=1,font=2)
mtext("chromosome",side=1,line=1.75,cex=1,font=2)

7. 基因结构的绘制

chrom = "chr15"
chromstart = 72998000
chromend = 73020000
pg =plotGenes(Sushi_genes.bed,chrom,chromstart,chromend ,types=Sushi_genes.bed$type,maxrows=1,bheight=0.2,plotgenetype="arrow",bentline=FALSE,labeloffset=.4,fontsize=1.2,arrowlength= 0.025,labeltext=TRUE)
labelgenome( chrom,chromstart,chromend,n=3,scale="Mb")
#RNA结构的绘制


chrom = "chr15"
chromstart = 72965000
chromend = 72990000
pg =plotGenes(Sushi_transcripts.bed,chrom,chromstart,chromend ,types =Sushi_transcripts.bed$type,colorby=log10(Sushi_transcripts.bed$score+0.001),colorbycol=SushiColors(5),colorbyrange=c(0,1.0),labeltext=TRUE,maxrows=50,height=0.4,plotgenetype="box")
labelgenome( chrom,chromstart,chromend,n=3,scale="Mb")
addlegend(pg[[1]],palette=pg[[2]],title="log10(FPKM)",side="right",bottominset=0.4,topinset=0,xoffset=-.035,labelside="left",width=0.025,title.offset=0.055)

8. 局部放大显示

layout(matrix(c(1,1,2,3),2, 2, byrow =TRUE))
par(mar=c(3,4,1,1))
 #基础架构
chrom = "chr11"
chromstart = 1900000
chromend = 2350000
plotBedgraph(Sushi_DNaseI.bedgraph,chrom,chromstart=chromstart,chromend=chromend,colorbycol=SushiColors(5))
labelgenome(chrom,chromstart=chromstart,chromend=chromend,n=4,scale="Mb")
mtext("ReadDepth",side=2,line=1.75,cex=1,font=2)
axis(side=2,las=2,tcl=.2)
 
#放大的位置
zoomregion1 = c(1955000,1960000)
zoomregion2 = c(2279000,2284000)
zoomsregion(zoomregion1,extend=c(0.01,0.13),wideextend=0.05,offsets=c(0,0.580))
zoomsregion(zoomregion2,extend=c(0.01,0.13),wideextend=0.05,offsets=c(0.580,0))


#放大区域数据可视化
plotBedgraph(Sushi_DNaseI.bedgraph,chrom,chromstart=zoomregion1[1],chromend=zoomregion1[2],colorbycol=SushiColors(5))
labelgenome(chrom,chromstart=zoomregion1[1],chromend=zoomregion1[2],n=4,scale="Kb",edgeblankfraction=0.2,cex.axis=.75)
zoombox()
mtext("ReadDepth",side=2,line=1.75,cex=1,font=2)
axis(side=2,las=2,tcl=.2)
 
plotBedgraph(Sushi_DNaseI.bedgraph,chrom,chromstart=zoomregion2[1],chromend=zoomregion2[2],colorbycol=SushiColors(5))
labelgenome(chrom,chromstart=zoomregion2[1],chromend=zoomregion2[2],n=4,scale="Kb",edgeblankfraction=0.2,cex.axis=.75)
zoombox()
mtext("ReadDepth",side=2,line=1.75,cex=1,font=2)
axis(side=2,las=2,tcl=.2)

欢迎大家互相学习交流!