Skip to content

Commit f3b0ad7

Browse files
committed
2 parents 5baf753 + f8d78b3 commit f3b0ad7

File tree

1 file changed

+69
-45
lines changed

1 file changed

+69
-45
lines changed

methylation_region_plot.R

Lines changed: 69 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -19,13 +19,15 @@ load_bsseq <- function(cytoreports) {
1919
samps=gsub("cyto.txt.*", "bisulfite", samps)
2020

2121
samps=toupper(samps)
22+
23+
methobj=read.bismark(files=cytoreports, sampleNames=samps, rmZeroCov=T, fileType="cytosineReport")
2224

23-
methobj=foreach(i=1:length(cytoreports), .combine=combine) %do% {
24-
single=read.bismark(files=cytoreports[i], sampleNames=samps[i], rmZeroCov=T, fileType="cytosineReport")
25-
##Bug workaround
26-
sampleNames(single)=samps[i]
27-
single
28-
}
25+
#methobj=foreach(i=1:length(cytoreports), .combine=combine) %do% {
26+
# single=read.bismark(files=cytoreports[i], sampleNames=samps[i], rmZeroCov=T, fileType="cytosineReport")
27+
# ##Bug workaround
28+
# sampleNames(single)=samps[i]
29+
# single
30+
#}
2931

3032
}
3133

@@ -79,9 +81,9 @@ region.sort.filter <- function(methobj, reg, cthresh=10, diff.thresh=.2, regout=
7981
write.table(reg.bedout, file=gzfile(regout), quote=F, sep="\t", row.names=F, col.names=F)
8082
}
8183

82-
83-
corr.plot <- function(methobj, cthresh=5, plotname="corr.pdf") {
84-
##Add colors for level of coverage
84+
corr.plot <- function(methobj, cthresh=10, plotname="corr.pdf") {
85+
library(ggExtra)
86+
8587

8688
##Assuming two samples only in bsseq
8789
methobj=methobj[,1:2]
@@ -97,33 +99,24 @@ corr.plot <- function(methobj, cthresh=5, plotname="corr.pdf") {
9799
##Get coverage of lesser (probably nanopore) data
98100
enough.meth$mincov=apply(basecov[enough.cov,], 1, min)
99101

100-
##Bin to threshold, threshold+5, threshold +10 (5, 10, 15)
101-
plus0=enough.meth$mincov>=(cthresh)&enough.meth$mincov<(cthresh+5)
102-
plus5=enough.meth$mincov>=(cthresh+5)&enough.meth$mincov<(cthresh+10)
103-
plus10=enough.meth$mincov>=(cthresh+10)
104-
105102
##Get correlation value
106-
corval=c(cor(enough.meth[plus0,1], enough.meth[plus0,2]),
107-
cor(enough.meth[plus5,1], enough.meth[plus5,2]),
108-
cor(enough.meth[plus10,1], enough.meth[plus10,2]))
103+
corval=cor(enough.meth[,1], enough.meth[,2])
109104

110-
cov.levels=c(paste0(cthresh, "-", cthresh+5, ": R=",format(corval[1], digits=2)),
111-
paste0(cthresh+5, "-", cthresh+10, ": R=",format(corval[2], digits=2)),
112-
paste0(">", cthresh+10, "X", ": R=", format(corval[3], digits=2)))
113-
114-
##Plot with different colors and titles for coverage
115-
enough.meth$covcol=cov.levels[1]
116-
enough.meth$covcol[plus5]=cov.levels[2]
117-
enough.meth$covcol[plus10]=cov.levels[3]
118-
enough.meth$covcol=factor(enough.meth$covcol, levels=cov.levels)
105+
cor.title=paste0(">", cthresh, "X: R=",format(corval, digits=2))
106+
107+
##Difference
108+
enough.meth$mdiff=enough.meth[,1]-enough.meth[,2]
119109

120-
pdf(plotname)
121-
print(ggplot(enough.meth, aes_string(x=colnames(enough.meth)[1], y=colnames(enough.meth)[2], color="covcol"))+geom_point(alpha=.4, show.legend=F)+theme_bw()+
122-
ggtitle("Correlation: ")+
123-
guides(col=guide_legend(title="Coverage range"))+
124-
facet_wrap(~covcol, nrow=2, ncol=2))
110+
pdf(plotname, useDingbats=F)
111+
vs.scat=ggplot(enough.meth, aes_string(x=colnames(enough.meth)[1], y=colnames(enough.meth)[2]))+geom_point(alpha=.4, show.legend=F)+theme_bw()+
112+
ggtitle(paste0("Correlation: ", cor.title))
113+
print(ggMarginal(vs.scat, type="histogram", margins="both"))
114+
115+
print(ggplot(enough.meth, aes_string(x=colnames(enough.meth)[1], y="mdiff"))+geom_point(alpha=.4, show.legend=F)+geom_smooth(method="loess")+theme_bw()+
116+
ggtitle(paste0("Correlation: ", cor.title)))
117+
125118
dev.off()
126-
119+
127120
}
128121

129122
filt.cgs <- function(methobj, cthresh=5) {
@@ -141,7 +134,7 @@ plot.meth.reg <- function(methobj, this.reg, plotcov=F, plotdiff=F, ns=10, h=500
141134
idx=overlapsAny(granges(methobj), this.reg)
142135

143136
this.obj=methobj[idx,]
144-
137+
145138
if (dim(this.obj)[1]>0) {
146139
this.obj=BSmooth(this.obj, ns=ns, h=h, verbose=F)
147140

@@ -150,15 +143,21 @@ plot.meth.reg <- function(methobj, this.reg, plotcov=F, plotdiff=F, ns=10, h=500
150143
meth=data.frame(coord=start(coord), getMeth(this.obj, type="raw", what="perBase"))
151144
sm.meth=data.frame(coord=start(coord), getMeth(this.obj, type="smooth", what="perBase"))
152145

153-
mel.meth=melt(meth, value.name="meth", variable.name="sample", id.vars="coord")
154-
mel.smooth=melt(sm.meth, value.name="meth", variable.name="sample", id.vars="coord")
146+
mel.meth=melt(meth, value.name="meth", variable.name="sid", id.vars="coord")
147+
mel.smooth=melt(sm.meth, value.name="meth", variable.name="sid", id.vars="coord")
155148

149+
##Get samples and analysis type split out
150+
mel.meth$sample=sapply(strsplit(as.character(mel.meth$sid), split="\\."), head, 1)
151+
mel.meth$tech=sapply(strsplit(as.character(mel.meth$sid), split="\\."), tail, 1)
152+
153+
mel.smooth$sample=sapply(strsplit(as.character(mel.smooth$sid), split="\\."), head, 1)
154+
mel.smooth$tech=sapply(strsplit(as.character(mel.smooth$sid), split="\\."), tail, 1)
156155

157156
mel.meth=mel.meth[!is.na(mel.meth$meth),]
158157
mel.smooth=mel.smooth[!is.na(mel.smooth$meth),]
159158

160-
print(ggplot(mel.meth, aes(x=coord, y=meth, color=sample))+geom_point(alpha=.4)+
161-
geom_line(data=mel.smooth)+
159+
print(ggplot(mel.meth, aes(x=coord, y=meth, color=sample, linetype=tech, shape=tech))+geom_point(alpha=.4)+
160+
geom_line(data=mel.smooth)+facet_wrap(~tech, ncol=1)+
162161
theme_bw()+labs(title=paste0("Methylation ", i, ": ", as.character(seqnames(this.reg[1]))))+
163162
xlab("Coordinate")+ylab("Percent Methylated")+
164163
guides(col=guide_legend(label.position="bottom", nrow=2))+theme(legend.position="bottom"))
@@ -187,15 +186,20 @@ plot.meth.reg <- function(methobj, this.reg, plotcov=F, plotdiff=F, ns=10, h=500
187186
if (plotcov) {
188187
##Get coverage
189188
cov=data.frame(coord=start(coord), getCoverage(this.obj, type="Cov", what="perBase"))
190-
mel.cov=melt(cov, value.name="cov", variable.name="sample", id.vars="coord")
189+
mel.cov=melt(cov, value.name="cov", variable.name="sid", id.vars="coord")
190+
191+
mel.cov$sample=sapply(strsplit(as.character(mel.cov$sid), split="\\."), head, 1)
192+
mel.cov$tech=sapply(strsplit(as.character(mel.cov$sid), split="\\."), tail, 1)
193+
194+
191195
##Get avg coverage for regions
192196
colMeans(cov[,2:dim(cov)[2]])
193197
covrep=paste(format(colMeans(cov[,2:dim(cov)[2]]), digits=2), collapse=";")
194198

195-
print(ggplot(mel.cov, aes(x=coord, y=cov, color=sample))+geom_point(alpha=.4)+
199+
print(ggplot(mel.cov, aes(x=coord, y=cov, color=sample, linetype=tech, shape=tech))+geom_point(alpha=.4)+
196200
theme_bw()+labs(title=paste0("Coverage ", i, ": ", covrep))+
197201
guides(col=guide_legend(label.position="bottom", nrow=2))+theme(legend.position="bottom")+
198-
facet_wrap(~sample, ncol=1,scale="free_y"))
202+
facet_wrap(~tech, ncol=1,scale="free_y"))
199203
}
200204

201205

@@ -205,7 +209,7 @@ plot.meth.reg <- function(methobj, this.reg, plotcov=F, plotdiff=F, ns=10, h=500
205209

206210

207211
plot.strand <- function(strand.loc, this.reg, called.only=T) {
208-
### Plot strand data
212+
### Plot strand data; called.only shows the ones with a confident call only
209213

210214
if (!called.only) {
211215
print(ggplot(strand.loc, aes(x=start, y=uid, color=factor(meth), group=uid))+geom_point(alpha=.5)+theme_bw()+
@@ -223,6 +227,26 @@ plot.strand <- function(strand.loc, this.reg, called.only=T) {
223227

224228
}
225229

230+
strand.loglik.hist <- function(strand.loc, highlight) {
231+
##Plot distribution of loglik ratio per strand, highlighting specific strands
232+
233+
strand.loc$col="blue"
234+
strand.loc$col[strand.loc$readix %in% highlight]="red"
235+
236+
237+
##Remove uncalled CGs
238+
strand.loc=strand.loc[strand.loc$meth!=0,]
239+
240+
print(ggplot(strand.loc, aes(x=loglikratio, color=col, group=readix))+theme_bw()+
241+
facet_wrap(~samp, ncol=1)+
242+
geom_density())
243+
244+
print(ggplot(strand.loc, aes(x=loglikratio, color=col, group=readix))+theme_bw()+
245+
facet_wrap(~samp, ncol=1)+
246+
geom_freqpoly())
247+
248+
}
249+
226250

227251

228252
args=commandArgs(TRUE)
@@ -234,7 +258,7 @@ if(! interactive()) {
234258
if(command == "correlation") {
235259
cytoreports=args[c(-1, -length(args))]
236260
methobj=load_bsseq(cytoreports)
237-
corr.plot(methobj, cthresh=5, plotname=args[length(args)])
261+
corr.plot(methobj, cthresh=10, plotname=args[length(args)])
238262
} else if(command == "best_regions") {
239263

240264
reg.gr=load_bed(args[2])
@@ -252,7 +276,7 @@ if(! interactive()) {
252276
methobj=load_bsseq(cytoreports)
253277

254278

255-
pdf(args[length(args)], height=8.5, width=11)
279+
pdf(args[length(args)], height=8.5, width=11, useDingbats=F)
256280

257281
for (i in 1:length(reg.gr)) {
258282
plot.meth.reg(methobj, reg.gr[i], h=750, ns=40, plotcov=T)
@@ -267,7 +291,7 @@ if(! interactive()) {
267291
cytoreports=args[c(-1, -2, -length(args))]
268292
methobj=load_bsseq(cytoreports)
269293

270-
pdf(args[length(args)], height=8.5, width=11)
294+
pdf(args[length(args)], height=8.5, width=11, useDingbats=F)
271295

272296
for (i in 1:length(reg.gr)) {
273297
plot.meth.reg(methobj, reg.gr[i], h=750, ns=40, plotcov=T, plotdiff=T)
@@ -285,7 +309,7 @@ if(! interactive()) {
285309

286310
meth.strand=load_strand(pfiles)
287311

288-
pdf(args[length(args)], height=8.5, width=11)
312+
pdf(args[length(args)], height=8.5, width=11, useDingbats=F)
289313

290314
for (i in 1:length(reg.gr)) {
291315
##Filter region

0 commit comments

Comments
 (0)