@@ -19,13 +19,15 @@ load_bsseq <- function(cytoreports) {
1919 samps = gsub(" cyto.txt.*" , " bisulfite" , samps )
2020
2121 samps = toupper(samps )
22+
23+ methobj = read.bismark(files = cytoreports , sampleNames = samps , rmZeroCov = T , fileType = " cytosineReport" )
2224
23- methobj = foreach(i = 1 : length(cytoreports ), .combine = combine ) %do % {
24- single = read.bismark(files = cytoreports [i ], sampleNames = samps [i ], rmZeroCov = T , fileType = " cytosineReport" )
25- # #Bug workaround
26- sampleNames(single )= samps [i ]
27- single
28- }
25+ # methobj=foreach(i=1:length(cytoreports), .combine=combine) %do% {
26+ # single=read.bismark(files=cytoreports[i], sampleNames=samps[i], rmZeroCov=T, fileType="cytosineReport")
27+ # ##Bug workaround
28+ # sampleNames(single)=samps[i]
29+ # single
30+ # }
2931
3032}
3133
@@ -79,9 +81,9 @@ region.sort.filter <- function(methobj, reg, cthresh=10, diff.thresh=.2, regout=
7981 write.table(reg.bedout , file = gzfile(regout ), quote = F , sep = " \t " , row.names = F , col.names = F )
8082}
8183
82-
83- corr.plot <- function ( methobj , cthresh = 5 , plotname = " corr.pdf " ) {
84- # #Add colors for level of coverage
84+ corr.plot <- function ( methobj , cthresh = 10 , plotname = " corr.pdf " ) {
85+ library( ggExtra )
86+
8587
8688 # #Assuming two samples only in bsseq
8789 methobj = methobj [,1 : 2 ]
@@ -97,33 +99,24 @@ corr.plot <- function(methobj, cthresh=5, plotname="corr.pdf") {
9799 # #Get coverage of lesser (probably nanopore) data
98100 enough.meth $ mincov = apply(basecov [enough.cov ,], 1 , min )
99101
100- # #Bin to threshold, threshold+5, threshold +10 (5, 10, 15)
101- plus0 = enough.meth $ mincov > = (cthresh )& enough.meth $ mincov < (cthresh + 5 )
102- plus5 = enough.meth $ mincov > = (cthresh + 5 )& enough.meth $ mincov < (cthresh + 10 )
103- plus10 = enough.meth $ mincov > = (cthresh + 10 )
104-
105102 # #Get correlation value
106- corval = c(cor(enough.meth [plus0 ,1 ], enough.meth [plus0 ,2 ]),
107- cor(enough.meth [plus5 ,1 ], enough.meth [plus5 ,2 ]),
108- cor(enough.meth [plus10 ,1 ], enough.meth [plus10 ,2 ]))
103+ corval = cor(enough.meth [,1 ], enough.meth [,2 ])
109104
110- cov.levels = c(paste0(cthresh , " -" , cthresh + 5 , " : R=" ,format(corval [1 ], digits = 2 )),
111- paste0(cthresh + 5 , " -" , cthresh + 10 , " : R=" ,format(corval [2 ], digits = 2 )),
112- paste0(" >" , cthresh + 10 , " X" , " : R=" , format(corval [3 ], digits = 2 )))
113-
114- # #Plot with different colors and titles for coverage
115- enough.meth $ covcol = cov.levels [1 ]
116- enough.meth $ covcol [plus5 ]= cov.levels [2 ]
117- enough.meth $ covcol [plus10 ]= cov.levels [3 ]
118- enough.meth $ covcol = factor (enough.meth $ covcol , levels = cov.levels )
105+ cor.title = paste0(" >" , cthresh , " X: R=" ,format(corval , digits = 2 ))
106+
107+ # #Difference
108+ enough.meth $ mdiff = enough.meth [,1 ]- enough.meth [,2 ]
119109
120- pdf(plotname )
121- print(ggplot(enough.meth , aes_string(x = colnames(enough.meth )[1 ], y = colnames(enough.meth )[2 ], color = " covcol" ))+ geom_point(alpha = .4 , show.legend = F )+ theme_bw()+
122- ggtitle(" Correlation: " )+
123- guides(col = guide_legend(title = " Coverage range" ))+
124- facet_wrap(~ covcol , nrow = 2 , ncol = 2 ))
110+ pdf(plotname , useDingbats = F )
111+ vs.scat = ggplot(enough.meth , aes_string(x = colnames(enough.meth )[1 ], y = colnames(enough.meth )[2 ]))+ geom_point(alpha = .4 , show.legend = F )+ theme_bw()+
112+ ggtitle(paste0(" Correlation: " , cor.title ))
113+ print(ggMarginal(vs.scat , type = " histogram" , margins = " both" ))
114+
115+ print(ggplot(enough.meth , aes_string(x = colnames(enough.meth )[1 ], y = " mdiff" ))+ geom_point(alpha = .4 , show.legend = F )+ geom_smooth(method = " loess" )+ theme_bw()+
116+ ggtitle(paste0(" Correlation: " , cor.title )))
117+
125118 dev.off()
126-
119+
127120}
128121
129122filt.cgs <- function (methobj , cthresh = 5 ) {
@@ -141,7 +134,7 @@ plot.meth.reg <- function(methobj, this.reg, plotcov=F, plotdiff=F, ns=10, h=500
141134 idx = overlapsAny(granges(methobj ), this.reg )
142135
143136 this.obj = methobj [idx ,]
144-
137+
145138 if (dim(this.obj )[1 ]> 0 ) {
146139 this.obj = BSmooth(this.obj , ns = ns , h = h , verbose = F )
147140
@@ -150,15 +143,21 @@ plot.meth.reg <- function(methobj, this.reg, plotcov=F, plotdiff=F, ns=10, h=500
150143 meth = data.frame (coord = start(coord ), getMeth(this.obj , type = " raw" , what = " perBase" ))
151144 sm.meth = data.frame (coord = start(coord ), getMeth(this.obj , type = " smooth" , what = " perBase" ))
152145
153- mel.meth = melt(meth , value.name = " meth" , variable.name = " sample " , id.vars = " coord" )
154- mel.smooth = melt(sm.meth , value.name = " meth" , variable.name = " sample " , id.vars = " coord" )
146+ mel.meth = melt(meth , value.name = " meth" , variable.name = " sid " , id.vars = " coord" )
147+ mel.smooth = melt(sm.meth , value.name = " meth" , variable.name = " sid " , id.vars = " coord" )
155148
149+ # #Get samples and analysis type split out
150+ mel.meth $ sample = sapply(strsplit(as.character(mel.meth $ sid ), split = " \\ ." ), head , 1 )
151+ mel.meth $ tech = sapply(strsplit(as.character(mel.meth $ sid ), split = " \\ ." ), tail , 1 )
152+
153+ mel.smooth $ sample = sapply(strsplit(as.character(mel.smooth $ sid ), split = " \\ ." ), head , 1 )
154+ mel.smooth $ tech = sapply(strsplit(as.character(mel.smooth $ sid ), split = " \\ ." ), tail , 1 )
156155
157156 mel.meth = mel.meth [! is.na(mel.meth $ meth ),]
158157 mel.smooth = mel.smooth [! is.na(mel.smooth $ meth ),]
159158
160- print(ggplot(mel.meth , aes(x = coord , y = meth , color = sample ))+ geom_point(alpha = .4 )+
161- geom_line(data = mel.smooth )+
159+ print(ggplot(mel.meth , aes(x = coord , y = meth , color = sample , linetype = tech , shape = tech ))+ geom_point(alpha = .4 )+
160+ geom_line(data = mel.smooth )+ facet_wrap( ~ tech , ncol = 1 ) +
162161 theme_bw()+ labs(title = paste0(" Methylation " , i , " : " , as.character(seqnames(this.reg [1 ]))))+
163162 xlab(" Coordinate" )+ ylab(" Percent Methylated" )+
164163 guides(col = guide_legend(label.position = " bottom" , nrow = 2 ))+ theme(legend.position = " bottom" ))
@@ -187,15 +186,20 @@ plot.meth.reg <- function(methobj, this.reg, plotcov=F, plotdiff=F, ns=10, h=500
187186 if (plotcov ) {
188187 # #Get coverage
189188 cov = data.frame (coord = start(coord ), getCoverage(this.obj , type = " Cov" , what = " perBase" ))
190- mel.cov = melt(cov , value.name = " cov" , variable.name = " sample" , id.vars = " coord" )
189+ mel.cov = melt(cov , value.name = " cov" , variable.name = " sid" , id.vars = " coord" )
190+
191+ mel.cov $ sample = sapply(strsplit(as.character(mel.cov $ sid ), split = " \\ ." ), head , 1 )
192+ mel.cov $ tech = sapply(strsplit(as.character(mel.cov $ sid ), split = " \\ ." ), tail , 1 )
193+
194+
191195 # #Get avg coverage for regions
192196 colMeans(cov [,2 : dim(cov )[2 ]])
193197 covrep = paste(format(colMeans(cov [,2 : dim(cov )[2 ]]), digits = 2 ), collapse = " ;" )
194198
195- print(ggplot(mel.cov , aes(x = coord , y = cov , color = sample ))+ geom_point(alpha = .4 )+
199+ print(ggplot(mel.cov , aes(x = coord , y = cov , color = sample , linetype = tech , shape = tech ))+ geom_point(alpha = .4 )+
196200 theme_bw()+ labs(title = paste0(" Coverage " , i , " : " , covrep ))+
197201 guides(col = guide_legend(label.position = " bottom" , nrow = 2 ))+ theme(legend.position = " bottom" )+
198- facet_wrap(~ sample , ncol = 1 ,scale = " free_y" ))
202+ facet_wrap(~ tech , ncol = 1 ,scale = " free_y" ))
199203 }
200204
201205
@@ -205,7 +209,7 @@ plot.meth.reg <- function(methobj, this.reg, plotcov=F, plotdiff=F, ns=10, h=500
205209
206210
207211plot.strand <- function (strand.loc , this.reg , called.only = T ) {
208- # ## Plot strand data
212+ # ## Plot strand data; called.only shows the ones with a confident call only
209213
210214 if (! called.only ) {
211215 print(ggplot(strand.loc , aes(x = start , y = uid , color = factor (meth ), group = uid ))+ geom_point(alpha = .5 )+ theme_bw()+
@@ -223,6 +227,26 @@ plot.strand <- function(strand.loc, this.reg, called.only=T) {
223227
224228}
225229
230+ strand.loglik.hist <- function (strand.loc , highlight ) {
231+ # #Plot distribution of loglik ratio per strand, highlighting specific strands
232+
233+ strand.loc $ col = " blue"
234+ strand.loc $ col [strand.loc $ readix %in% highlight ]= " red"
235+
236+
237+ # #Remove uncalled CGs
238+ strand.loc = strand.loc [strand.loc $ meth != 0 ,]
239+
240+ print(ggplot(strand.loc , aes(x = loglikratio , color = col , group = readix ))+ theme_bw()+
241+ facet_wrap(~ samp , ncol = 1 )+
242+ geom_density())
243+
244+ print(ggplot(strand.loc , aes(x = loglikratio , color = col , group = readix ))+ theme_bw()+
245+ facet_wrap(~ samp , ncol = 1 )+
246+ geom_freqpoly())
247+
248+ }
249+
226250
227251
228252args = commandArgs(TRUE )
@@ -234,7 +258,7 @@ if(! interactive()) {
234258 if (command == " correlation" ) {
235259 cytoreports = args [c(- 1 , - length(args ))]
236260 methobj = load_bsseq(cytoreports )
237- corr.plot(methobj , cthresh = 5 , plotname = args [length(args )])
261+ corr.plot(methobj , cthresh = 10 , plotname = args [length(args )])
238262 } else if (command == " best_regions" ) {
239263
240264 reg.gr = load_bed(args [2 ])
@@ -252,7 +276,7 @@ if(! interactive()) {
252276 methobj = load_bsseq(cytoreports )
253277
254278
255- pdf(args [length(args )], height = 8.5 , width = 11 )
279+ pdf(args [length(args )], height = 8.5 , width = 11 , useDingbats = F )
256280
257281 for (i in 1 : length(reg.gr )) {
258282 plot.meth.reg(methobj , reg.gr [i ], h = 750 , ns = 40 , plotcov = T )
@@ -267,7 +291,7 @@ if(! interactive()) {
267291 cytoreports = args [c(- 1 , - 2 , - length(args ))]
268292 methobj = load_bsseq(cytoreports )
269293
270- pdf(args [length(args )], height = 8.5 , width = 11 )
294+ pdf(args [length(args )], height = 8.5 , width = 11 , useDingbats = F )
271295
272296 for (i in 1 : length(reg.gr )) {
273297 plot.meth.reg(methobj , reg.gr [i ], h = 750 , ns = 40 , plotcov = T , plotdiff = T )
@@ -285,7 +309,7 @@ if(! interactive()) {
285309
286310 meth.strand = load_strand(pfiles )
287311
288- pdf(args [length(args )], height = 8.5 , width = 11 )
312+ pdf(args [length(args )], height = 8.5 , width = 11 , useDingbats = F )
289313
290314 for (i in 1 : length(reg.gr )) {
291315 # #Filter region
0 commit comments