@@ -79,6 +79,11 @@ def trim(segment, primer_pos, end, verbose=False):
79
79
vernose : bool
80
80
If True, will print soft masking info during trimming
81
81
"""
82
+ if verbose :
83
+ print (
84
+ f"{ segment .query_name } : Trimming { 'end' if end else 'start' } of read to primer position { primer_pos } " ,
85
+ file = sys .stderr ,
86
+ )
82
87
# get a copy of the cigar tuples to work with
83
88
cigar = copy (segment .cigartuples )
84
89
@@ -99,11 +104,14 @@ def trim(segment, primer_pos, end, verbose=False):
99
104
else :
100
105
flag , length = cigar .pop (0 )
101
106
if verbose :
102
- print ("Chomped a %s, %s" % (flag , length ), file = sys .stderr )
107
+ print (
108
+ f"{ segment .query_name } : Chomped a { flag } , { length } " ,
109
+ file = sys .stderr ,
110
+ )
103
111
except IndexError :
104
112
if verbose :
105
113
print (
106
- " Ran out of cigar during soft masking - completely masked read will be ignored" ,
114
+ f" { segment . query_name } : Ran out of cigar during soft masking - completely masked read will be ignored" ,
107
115
file = sys .stderr ,
108
116
)
109
117
break
@@ -128,10 +136,13 @@ def trim(segment, primer_pos, end, verbose=False):
128
136
# calculate how many extra matches are needed in the CIGAR
129
137
extra = abs (pos - primer_pos )
130
138
if verbose :
131
- print (" extra %s" % ( extra ) , file = sys .stderr )
139
+ print (f" { segment . query_name } : extra { extra } " , file = sys .stderr )
132
140
if extra :
133
141
if verbose :
134
- print ("Inserted a %s, %s" % (0 , extra ), file = sys .stderr )
142
+ print (
143
+ f"{ segment .query_name } : Inserted a 0, { extra } " ,
144
+ file = sys .stderr ,
145
+ )
135
146
if end :
136
147
cigar .append ((0 , extra ))
137
148
else :
@@ -144,13 +155,16 @@ def trim(segment, primer_pos, end, verbose=False):
144
155
# update the position of the leftmost mappinng base
145
156
segment .pos = pos - extra
146
157
if verbose :
147
- print ("New pos: %s" % (segment .pos ), file = sys .stderr )
158
+ print (
159
+ f"{ segment .query_name } : New pos - { segment .pos } " ,
160
+ file = sys .stderr ,
161
+ )
148
162
149
163
# if proposed softmask leads straight into a deletion, shuffle leftmost mapping base along and ignore the deletion
150
164
if cigar [0 ][0 ] == 2 :
151
165
if verbose :
152
166
print (
153
- " softmask created a leading deletion in the CIGAR, shuffling the alignment" ,
167
+ f" { segment . query_name } : softmask created a leading deletion in the CIGAR, shuffling the alignment" ,
154
168
file = sys .stderr ,
155
169
)
156
170
while 1 :
@@ -168,7 +182,13 @@ def trim(segment, primer_pos, end, verbose=False):
168
182
169
183
# check the new CIGAR and replace the old one
170
184
if cigar [0 ][1 ] <= 0 or cigar [- 1 ][1 ] <= 0 :
171
- raise ("invalid cigar operation created - possibly due to INDEL in primer" )
185
+ if verbose :
186
+ print (
187
+ f"{ segment .query_name } : invalid cigar operation created - possibly due to INDEL in primer" ,
188
+ file = sys .stderr ,
189
+ )
190
+ return
191
+
172
192
segment .cigartuples = cigar
173
193
return
174
194
@@ -195,16 +215,22 @@ def handle_segment(
195
215
# filter out unmapped and supplementary alignment segments
196
216
if segment .is_unmapped :
197
217
if args .verbose :
198
- print ("%s skipped as unmapped" % (segment .query_name ), file = sys .stderr )
218
+ print (
219
+ f"{ segment .query_name } : { segment .query_name } skipped as unmapped" ,
220
+ file = sys .stderr ,
221
+ )
199
222
return False
200
223
if segment .is_supplementary :
201
224
if args .verbose :
202
- print ("%s skipped as supplementary" % (segment .query_name ), file = sys .stderr )
225
+ print (
226
+ f"{ segment .query_name } : { segment .query_name } skipped as supplementary" ,
227
+ file = sys .stderr ,
228
+ )
203
229
return False
204
230
if segment .mapping_quality < min_mapq :
205
231
if args .verbose :
206
232
print (
207
- "%s skipped as mapping quality below threshold" % ( segment . query_name ) ,
233
+ f" { segment . query_name } : skipped as mapping quality below threshold" ,
208
234
file = sys .stderr ,
209
235
)
210
236
return False
@@ -230,7 +256,7 @@ def handle_segment(
230
256
if not p1 or not p2 :
231
257
if args .verbose :
232
258
print (
233
- "%s skipped as no primer found for segment" % ( segment . query_name ) ,
259
+ f" { segment . query_name } : skipped as no primer found for segment" ,
234
260
file = sys .stderr ,
235
261
)
236
262
return False
@@ -260,9 +286,9 @@ def handle_segment(
260
286
"ReferenceEnd" : segment .reference_end ,
261
287
"PrimerPair" : f"{ p1 [2 ]['Primer_ID' ]} _{ p2 [2 ]['Primer_ID' ]} " ,
262
288
"Primer1" : p1 [2 ]["Primer_ID" ],
263
- "Primer1Start" : abs ( p1 [1 ]) ,
289
+ "Primer1Start" : p1 [2 ][ "start" ] ,
264
290
"Primer2" : p2 [2 ]["Primer_ID" ],
265
- "Primer2Start" : abs ( p2 [1 ]) ,
291
+ "Primer2Start" : p2 [2 ][ "start" ] ,
266
292
"IsSecondary" : segment .is_secondary ,
267
293
"IsSupplementary" : segment .is_supplementary ,
268
294
"Start" : p1 [2 ]["start" ],
@@ -275,16 +301,11 @@ def handle_segment(
275
301
if args .remove_incorrect_pairs and not correctly_paired :
276
302
if args .verbose :
277
303
print (
278
- "%s skipped as not correctly paired" % ( segment . query_name ) ,
304
+ f" { segment . query_name } : skipped as not correctly paired" ,
279
305
file = sys .stderr ,
280
306
)
281
307
return False
282
308
283
- if args .verbose :
284
- # Dont screw with the order of the dict
285
- report_str = "\t " .join (str (x ) for x in report .values ())
286
- print (report_str , file = sys .stderr )
287
-
288
309
# get the primer positions
289
310
if args .trim_primers :
290
311
p1_position = p1 [2 ]["end" ]
@@ -299,15 +320,12 @@ def handle_segment(
299
320
trim (segment , p1_position , False , args .verbose )
300
321
if args .verbose :
301
322
print (
302
- "ref start %s >= primer_position %s"
303
- % (segment .reference_start , p1_position ),
323
+ f"{ segment .query_name } : ref start { segment .reference_start } >= primer_position { p1_position } " ,
304
324
file = sys .stderr ,
305
325
)
306
326
except Exception as e :
307
327
print (
308
- "problem soft masking left primer in {} (error: {}), skipping" .format (
309
- segment .query_name , e
310
- ),
328
+ f"{ segment .query_name } : problem soft masking left primer (error: { e } ), skipping" ,
311
329
file = sys .stderr ,
312
330
)
313
331
return False
@@ -318,15 +336,12 @@ def handle_segment(
318
336
trim (segment , p2_position , True , args .verbose )
319
337
if args .verbose :
320
338
print (
321
- "ref start %s >= primer_position %s"
322
- % (segment .reference_start , p2_position ),
339
+ f"{ segment .query_name } : ref start { segment .reference_start } >= primer_position { p2_position } " ,
323
340
file = sys .stderr ,
324
341
)
325
342
except Exception as e :
326
343
print (
327
- "problem soft masking right primer in {} (error: {}), skipping" .format (
328
- segment .query_name , e
329
- ),
344
+ f"{ segment .query_name } : problem soft masking right primer (error: { e } ), skipping" ,
330
345
file = sys .stderr ,
331
346
)
332
347
return False
@@ -335,8 +350,7 @@ def handle_segment(
335
350
if "M" not in segment .cigarstring :
336
351
if args .verbose :
337
352
print (
338
- "%s dropped as does not match reference post masking"
339
- % (segment .query_name ),
353
+ f"{ segment .query_name } : dropped as does not match reference post masking" ,
340
354
file = sys .stderr ,
341
355
)
342
356
return False
@@ -366,9 +380,10 @@ def handle_paired_segment(
366
380
segment1 , segment2 = segments
367
381
368
382
if not segment1 or not segment2 :
383
+ segment = segment1 if segment1 else segment2
369
384
if args .verbose :
370
385
print (
371
- "Segment pair skipped as at least one segment in pair does not exist" ,
386
+ f" { segment . query_name } : Pair skipped as at least one segment in pair does not exist" ,
372
387
file = sys .stderr ,
373
388
)
374
389
return False
@@ -377,24 +392,23 @@ def handle_paired_segment(
377
392
if segment1 .is_unmapped or segment2 .is_unmapped :
378
393
if args .verbose :
379
394
print (
380
- " Segment pair: %s skipped as unmapped" % ( segment1 . query_name ) ,
395
+ f" { segment1 . query_name } : Segment pair skipped as one of pair is unmapped" ,
381
396
file = sys .stderr ,
382
397
)
383
398
return False
384
399
385
400
if segment1 .is_supplementary or segment2 .is_supplementary :
386
401
if args .verbose :
387
402
print (
388
- "Segment pair: %s skipped as supplementary" % ( segment1 . query_name ) ,
403
+ f" { segment1 . query_name } : Pair skipped as at least one of pair is supplementary" ,
389
404
file = sys .stderr ,
390
405
)
391
406
return False
392
407
393
408
if segment1 .mapping_quality < min_mapq or segment2 .mapping_quality < min_mapq :
394
409
if args .verbose :
395
410
print (
396
- "Segment pair: %s skipped as mapping quality below threshold"
397
- % (segment1 .query_name ),
411
+ f"{ segment1 .query_name } : Pair skipped as at least one mapping quality of pair is below threshold" ,
398
412
file = sys .stderr ,
399
413
)
400
414
return False
@@ -418,8 +432,7 @@ def handle_paired_segment(
418
432
if not p1 or not p2 :
419
433
if args .verbose :
420
434
print (
421
- "Paired segment: %s skipped as no primer found for segment"
422
- % (segment1 .query_name ),
435
+ f"{ segment1 .query_name } : Pair skipped as no primer found for at least one read in pair" ,
423
436
file = sys .stderr ,
424
437
)
425
438
return False
@@ -451,9 +464,9 @@ def handle_paired_segment(
451
464
"ReferenceEnd" : segment2 .reference_end ,
452
465
"PrimerPair" : f"{ p1 [2 ]['Primer_ID' ]} _{ p2 [2 ]['Primer_ID' ]} " ,
453
466
"Primer1" : p1 [2 ]["Primer_ID" ],
454
- "Primer1Start" : abs ( p1 [1 ]) ,
467
+ "Primer1Start" : p1 [2 ][ "start" ] ,
455
468
"Primer2" : p2 [2 ]["Primer_ID" ],
456
- "Primer2Start" : abs ( p2 [1 ]) ,
469
+ "Primer2Start" : p2 [2 ][ "start" ] ,
457
470
"IsSecondary" : segment1 .is_secondary ,
458
471
"IsSupplementary" : segment1 .is_supplementary ,
459
472
"Start" : p1 [2 ]["start" ],
@@ -466,17 +479,11 @@ def handle_paired_segment(
466
479
if args .remove_incorrect_pairs and not correctly_paired :
467
480
if args .verbose :
468
481
print (
469
- "Paired segment: %s skipped as not correctly paired"
470
- % (segment1 .query_name ),
482
+ f"{ segment1 .query_name } : Pair skipped due to primers not being correctly paired between reads of pair" ,
471
483
file = sys .stderr ,
472
484
)
473
485
return False
474
486
475
- if args .verbose :
476
- # Dont screw with the order of the dict
477
- report_str = "\t " .join (str (x ) for x in report .values ())
478
- print (report_str , file = sys .stderr )
479
-
480
487
# get the primer positions
481
488
if args .trim_primers :
482
489
p1_position = p1 [2 ]["end" ]
@@ -491,15 +498,27 @@ def handle_paired_segment(
491
498
trim (segment1 , p1_position , False , args .verbose )
492
499
if args .verbose :
493
500
print (
494
- "ref start %s >= primer_position %s"
495
- % (segment1 .reference_start , p1_position ),
501
+ f"{ segment1 .query_name } : ref start { segment1 .reference_start } >= primer_position { p1_position } " ,
496
502
file = sys .stderr ,
497
503
)
498
504
except Exception as e :
499
505
print (
500
- "problem soft masking left primer in {} (error: {}), skipping" .format (
501
- segment1 .query_name , e
502
- ),
506
+ f"{ segment1 .query_name } : Problem soft masking left primer (error: { e } ), skipping" ,
507
+ file = sys .stderr ,
508
+ )
509
+ return False
510
+
511
+ elif segment1 .reference_end > p2_position :
512
+ try :
513
+ trim (segment1 , p2_position , True , args .verbose )
514
+ if args .verbose :
515
+ print (
516
+ f"{ segment1 .query_name } : ref_end { segment1 .reference_end } >= primer_position { p2_position } " ,
517
+ file = sys .stderr ,
518
+ )
519
+ except Exception as e :
520
+ print (
521
+ f"{ segment1 .query_name } : Problem soft masking right primer (error: { e } ), skipping" ,
503
522
file = sys .stderr ,
504
523
)
505
524
return False
@@ -510,15 +529,26 @@ def handle_paired_segment(
510
529
trim (segment2 , p2_position , True , args .verbose )
511
530
if args .verbose :
512
531
print (
513
- "ref start %s >= primer_position %s"
514
- % (segment2 .reference_start , p2_position ),
532
+ f"{ segment1 .query_name } : ref_start { segment2 .reference_start } >= primer_position { p2_position } " ,
515
533
file = sys .stderr ,
516
534
)
517
535
except Exception as e :
518
536
print (
519
- "problem soft masking right primer in {} (error: {}), skipping" .format (
520
- segment1 .query_name , e
521
- ),
537
+ f"{ segment1 .query_name } : Problem soft masking right primer (error: { e } ), skipping" ,
538
+ file = sys .stderr ,
539
+ )
540
+ return False
541
+ elif segment2 .reference_start < p1_position :
542
+ try :
543
+ trim (segment2 , p1_position , False , args .verbose )
544
+ if args .verbose :
545
+ print (
546
+ f"{ segment1 .query_name } : ref_end { segment2 .reference_end } >= primer_position { p1_position } " ,
547
+ file = sys .stderr ,
548
+ )
549
+ except Exception as e :
550
+ print (
551
+ f"{ segment1 .query_name } : Problem soft masking left primer (error: { e } ), skipping" ,
522
552
file = sys .stderr ,
523
553
)
524
554
return False
@@ -527,8 +557,7 @@ def handle_paired_segment(
527
557
if "M" not in segment1 .cigarstring or "M" not in segment2 .cigarstring :
528
558
if args .verbose :
529
559
print (
530
- "Paired segment: %s dropped as does not match reference post masking"
531
- % (segment1 .query_name ),
560
+ f"{ segment1 .query_name } : Paired segment dropped as does not match reference post masking" ,
532
561
file = sys .stderr ,
533
562
)
534
563
return False
@@ -613,7 +642,7 @@ def normalise(trimmed_segments: dict, normalise: int, bed: list, verbose: bool =
613
642
for chrom , amplicon_dict in trimmed_segments .items ():
614
643
for amplicon , segments in amplicon_dict .items ():
615
644
if amplicon not in amplicons [chrom ]:
616
- raise ValueError (f"Segment { amplicon } not found in primer scheme file" )
645
+ raise ValueError (f"Amplicon { amplicon } not found in primer scheme file" )
617
646
618
647
desired_depth = np .full_like (
619
648
(amplicons [chrom ][amplicon ]["length" ],), normalise , dtype = int
@@ -952,8 +981,8 @@ def main():
952
981
parser .add_argument (
953
982
"--no-read-groups" ,
954
983
dest = "no_read_groups" ,
955
- action = "store_true" ,
956
984
help = "Do not divide reads into groups in SAM output" ,
985
+ action = "store_true" ,
957
986
)
958
987
parser .add_argument ("--verbose" , action = "store_true" , help = "Debug mode" )
959
988
parser .add_argument ("--remove-incorrect-pairs" , action = "store_true" )
0 commit comments