Skip to content

Commit cf9992e

Browse files
authored
1.7.4 PR (#170)
* 1.7.4 push * Unify verbose progress messages, always include read_id * Fix segment nonetype bug * Fix leftover %
1 parent f4afa53 commit cf9992e

File tree

2 files changed

+91
-62
lines changed

2 files changed

+91
-62
lines changed

artic/align_trim.py

Lines changed: 90 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,11 @@ def trim(segment, primer_pos, end, verbose=False):
7979
vernose : bool
8080
If True, will print soft masking info during trimming
8181
"""
82+
if verbose:
83+
print(
84+
f"{segment.query_name}: Trimming {'end' if end else 'start'} of read to primer position {primer_pos}",
85+
file=sys.stderr,
86+
)
8287
# get a copy of the cigar tuples to work with
8388
cigar = copy(segment.cigartuples)
8489

@@ -99,11 +104,14 @@ def trim(segment, primer_pos, end, verbose=False):
99104
else:
100105
flag, length = cigar.pop(0)
101106
if verbose:
102-
print("Chomped a %s, %s" % (flag, length), file=sys.stderr)
107+
print(
108+
f"{segment.query_name}: Chomped a {flag}, {length}",
109+
file=sys.stderr,
110+
)
103111
except IndexError:
104112
if verbose:
105113
print(
106-
"Ran out of cigar during soft masking - completely masked read will be ignored",
114+
f"{segment.query_name}: Ran out of cigar during soft masking - completely masked read will be ignored",
107115
file=sys.stderr,
108116
)
109117
break
@@ -128,10 +136,13 @@ def trim(segment, primer_pos, end, verbose=False):
128136
# calculate how many extra matches are needed in the CIGAR
129137
extra = abs(pos - primer_pos)
130138
if verbose:
131-
print("extra %s" % (extra), file=sys.stderr)
139+
print(f"{segment.query_name}: extra {extra}", file=sys.stderr)
132140
if extra:
133141
if verbose:
134-
print("Inserted a %s, %s" % (0, extra), file=sys.stderr)
142+
print(
143+
f"{segment.query_name}: Inserted a 0, {extra}",
144+
file=sys.stderr,
145+
)
135146
if end:
136147
cigar.append((0, extra))
137148
else:
@@ -144,13 +155,16 @@ def trim(segment, primer_pos, end, verbose=False):
144155
# update the position of the leftmost mappinng base
145156
segment.pos = pos - extra
146157
if verbose:
147-
print("New pos: %s" % (segment.pos), file=sys.stderr)
158+
print(
159+
f"{segment.query_name}: New pos - {segment.pos}",
160+
file=sys.stderr,
161+
)
148162

149163
# if proposed softmask leads straight into a deletion, shuffle leftmost mapping base along and ignore the deletion
150164
if cigar[0][0] == 2:
151165
if verbose:
152166
print(
153-
"softmask created a leading deletion in the CIGAR, shuffling the alignment",
167+
f"{segment.query_name}: softmask created a leading deletion in the CIGAR, shuffling the alignment",
154168
file=sys.stderr,
155169
)
156170
while 1:
@@ -168,7 +182,13 @@ def trim(segment, primer_pos, end, verbose=False):
168182

169183
# check the new CIGAR and replace the old one
170184
if cigar[0][1] <= 0 or cigar[-1][1] <= 0:
171-
raise ("invalid cigar operation created - possibly due to INDEL in primer")
185+
if verbose:
186+
print(
187+
f"{segment.query_name}: invalid cigar operation created - possibly due to INDEL in primer",
188+
file=sys.stderr,
189+
)
190+
return
191+
172192
segment.cigartuples = cigar
173193
return
174194

@@ -195,16 +215,22 @@ def handle_segment(
195215
# filter out unmapped and supplementary alignment segments
196216
if segment.is_unmapped:
197217
if args.verbose:
198-
print("%s skipped as unmapped" % (segment.query_name), file=sys.stderr)
218+
print(
219+
f"{segment.query_name}: {segment.query_name} skipped as unmapped",
220+
file=sys.stderr,
221+
)
199222
return False
200223
if segment.is_supplementary:
201224
if args.verbose:
202-
print("%s skipped as supplementary" % (segment.query_name), file=sys.stderr)
225+
print(
226+
f"{segment.query_name}: {segment.query_name} skipped as supplementary",
227+
file=sys.stderr,
228+
)
203229
return False
204230
if segment.mapping_quality < min_mapq:
205231
if args.verbose:
206232
print(
207-
"%s skipped as mapping quality below threshold" % (segment.query_name),
233+
f"{segment.query_name}: skipped as mapping quality below threshold",
208234
file=sys.stderr,
209235
)
210236
return False
@@ -230,7 +256,7 @@ def handle_segment(
230256
if not p1 or not p2:
231257
if args.verbose:
232258
print(
233-
"%s skipped as no primer found for segment" % (segment.query_name),
259+
f"{segment.query_name}: skipped as no primer found for segment",
234260
file=sys.stderr,
235261
)
236262
return False
@@ -260,9 +286,9 @@ def handle_segment(
260286
"ReferenceEnd": segment.reference_end,
261287
"PrimerPair": f"{p1[2]['Primer_ID']}_{p2[2]['Primer_ID']}",
262288
"Primer1": p1[2]["Primer_ID"],
263-
"Primer1Start": abs(p1[1]),
289+
"Primer1Start": p1[2]["start"],
264290
"Primer2": p2[2]["Primer_ID"],
265-
"Primer2Start": abs(p2[1]),
291+
"Primer2Start": p2[2]["start"],
266292
"IsSecondary": segment.is_secondary,
267293
"IsSupplementary": segment.is_supplementary,
268294
"Start": p1[2]["start"],
@@ -275,16 +301,11 @@ def handle_segment(
275301
if args.remove_incorrect_pairs and not correctly_paired:
276302
if args.verbose:
277303
print(
278-
"%s skipped as not correctly paired" % (segment.query_name),
304+
f"{segment.query_name}: skipped as not correctly paired",
279305
file=sys.stderr,
280306
)
281307
return False
282308

283-
if args.verbose:
284-
# Dont screw with the order of the dict
285-
report_str = "\t".join(str(x) for x in report.values())
286-
print(report_str, file=sys.stderr)
287-
288309
# get the primer positions
289310
if args.trim_primers:
290311
p1_position = p1[2]["end"]
@@ -299,15 +320,12 @@ def handle_segment(
299320
trim(segment, p1_position, False, args.verbose)
300321
if args.verbose:
301322
print(
302-
"ref start %s >= primer_position %s"
303-
% (segment.reference_start, p1_position),
323+
f"{segment.query_name}: ref start {segment.reference_start} >= primer_position {p1_position}",
304324
file=sys.stderr,
305325
)
306326
except Exception as e:
307327
print(
308-
"problem soft masking left primer in {} (error: {}), skipping".format(
309-
segment.query_name, e
310-
),
328+
f"{segment.query_name}: problem soft masking left primer (error: {e}), skipping",
311329
file=sys.stderr,
312330
)
313331
return False
@@ -318,15 +336,12 @@ def handle_segment(
318336
trim(segment, p2_position, True, args.verbose)
319337
if args.verbose:
320338
print(
321-
"ref start %s >= primer_position %s"
322-
% (segment.reference_start, p2_position),
339+
f"{segment.query_name}: ref start {segment.reference_start} >= primer_position {p2_position}",
323340
file=sys.stderr,
324341
)
325342
except Exception as e:
326343
print(
327-
"problem soft masking right primer in {} (error: {}), skipping".format(
328-
segment.query_name, e
329-
),
344+
f"{segment.query_name}: problem soft masking right primer (error: {e}), skipping",
330345
file=sys.stderr,
331346
)
332347
return False
@@ -335,8 +350,7 @@ def handle_segment(
335350
if "M" not in segment.cigarstring:
336351
if args.verbose:
337352
print(
338-
"%s dropped as does not match reference post masking"
339-
% (segment.query_name),
353+
f"{segment.query_name}: dropped as does not match reference post masking",
340354
file=sys.stderr,
341355
)
342356
return False
@@ -366,9 +380,10 @@ def handle_paired_segment(
366380
segment1, segment2 = segments
367381

368382
if not segment1 or not segment2:
383+
segment = segment1 if segment1 else segment2
369384
if args.verbose:
370385
print(
371-
"Segment pair skipped as at least one segment in pair does not exist",
386+
f"{segment.query_name}: Pair skipped as at least one segment in pair does not exist",
372387
file=sys.stderr,
373388
)
374389
return False
@@ -377,24 +392,23 @@ def handle_paired_segment(
377392
if segment1.is_unmapped or segment2.is_unmapped:
378393
if args.verbose:
379394
print(
380-
"Segment pair: %s skipped as unmapped" % (segment1.query_name),
395+
f"{segment1.query_name}: Segment pair skipped as one of pair is unmapped",
381396
file=sys.stderr,
382397
)
383398
return False
384399

385400
if segment1.is_supplementary or segment2.is_supplementary:
386401
if args.verbose:
387402
print(
388-
"Segment pair: %s skipped as supplementary" % (segment1.query_name),
403+
f"{segment1.query_name}: Pair skipped as at least one of pair is supplementary",
389404
file=sys.stderr,
390405
)
391406
return False
392407

393408
if segment1.mapping_quality < min_mapq or segment2.mapping_quality < min_mapq:
394409
if args.verbose:
395410
print(
396-
"Segment pair: %s skipped as mapping quality below threshold"
397-
% (segment1.query_name),
411+
f"{segment1.query_name}: Pair skipped as at least one mapping quality of pair is below threshold",
398412
file=sys.stderr,
399413
)
400414
return False
@@ -418,8 +432,7 @@ def handle_paired_segment(
418432
if not p1 or not p2:
419433
if args.verbose:
420434
print(
421-
"Paired segment: %s skipped as no primer found for segment"
422-
% (segment1.query_name),
435+
f"{segment1.query_name}: Pair skipped as no primer found for at least one read in pair",
423436
file=sys.stderr,
424437
)
425438
return False
@@ -451,9 +464,9 @@ def handle_paired_segment(
451464
"ReferenceEnd": segment2.reference_end,
452465
"PrimerPair": f"{p1[2]['Primer_ID']}_{p2[2]['Primer_ID']}",
453466
"Primer1": p1[2]["Primer_ID"],
454-
"Primer1Start": abs(p1[1]),
467+
"Primer1Start": p1[2]["start"],
455468
"Primer2": p2[2]["Primer_ID"],
456-
"Primer2Start": abs(p2[1]),
469+
"Primer2Start": p2[2]["start"],
457470
"IsSecondary": segment1.is_secondary,
458471
"IsSupplementary": segment1.is_supplementary,
459472
"Start": p1[2]["start"],
@@ -466,17 +479,11 @@ def handle_paired_segment(
466479
if args.remove_incorrect_pairs and not correctly_paired:
467480
if args.verbose:
468481
print(
469-
"Paired segment: %s skipped as not correctly paired"
470-
% (segment1.query_name),
482+
f"{segment1.query_name}: Pair skipped due to primers not being correctly paired between reads of pair",
471483
file=sys.stderr,
472484
)
473485
return False
474486

475-
if args.verbose:
476-
# Dont screw with the order of the dict
477-
report_str = "\t".join(str(x) for x in report.values())
478-
print(report_str, file=sys.stderr)
479-
480487
# get the primer positions
481488
if args.trim_primers:
482489
p1_position = p1[2]["end"]
@@ -491,15 +498,27 @@ def handle_paired_segment(
491498
trim(segment1, p1_position, False, args.verbose)
492499
if args.verbose:
493500
print(
494-
"ref start %s >= primer_position %s"
495-
% (segment1.reference_start, p1_position),
501+
f"{segment1.query_name}: ref start {segment1.reference_start} >= primer_position {p1_position}",
496502
file=sys.stderr,
497503
)
498504
except Exception as e:
499505
print(
500-
"problem soft masking left primer in {} (error: {}), skipping".format(
501-
segment1.query_name, e
502-
),
506+
f"{segment1.query_name}: Problem soft masking left primer (error: {e}), skipping",
507+
file=sys.stderr,
508+
)
509+
return False
510+
511+
elif segment1.reference_end > p2_position:
512+
try:
513+
trim(segment1, p2_position, True, args.verbose)
514+
if args.verbose:
515+
print(
516+
f"{segment1.query_name}: ref_end {segment1.reference_end} >= primer_position {p2_position}",
517+
file=sys.stderr,
518+
)
519+
except Exception as e:
520+
print(
521+
f"{segment1.query_name}: Problem soft masking right primer (error: {e}), skipping",
503522
file=sys.stderr,
504523
)
505524
return False
@@ -510,15 +529,26 @@ def handle_paired_segment(
510529
trim(segment2, p2_position, True, args.verbose)
511530
if args.verbose:
512531
print(
513-
"ref start %s >= primer_position %s"
514-
% (segment2.reference_start, p2_position),
532+
f"{segment1.query_name}: ref_start {segment2.reference_start} >= primer_position {p2_position}",
515533
file=sys.stderr,
516534
)
517535
except Exception as e:
518536
print(
519-
"problem soft masking right primer in {} (error: {}), skipping".format(
520-
segment1.query_name, e
521-
),
537+
f"{segment1.query_name}: Problem soft masking right primer (error: {e}), skipping",
538+
file=sys.stderr,
539+
)
540+
return False
541+
elif segment2.reference_start < p1_position:
542+
try:
543+
trim(segment2, p1_position, False, args.verbose)
544+
if args.verbose:
545+
print(
546+
f"{segment1.query_name}: ref_end {segment2.reference_end} >= primer_position {p1_position}",
547+
file=sys.stderr,
548+
)
549+
except Exception as e:
550+
print(
551+
f"{segment1.query_name}: Problem soft masking left primer (error: {e}), skipping",
522552
file=sys.stderr,
523553
)
524554
return False
@@ -527,8 +557,7 @@ def handle_paired_segment(
527557
if "M" not in segment1.cigarstring or "M" not in segment2.cigarstring:
528558
if args.verbose:
529559
print(
530-
"Paired segment: %s dropped as does not match reference post masking"
531-
% (segment1.query_name),
560+
f"{segment1.query_name}: Paired segment dropped as does not match reference post masking",
532561
file=sys.stderr,
533562
)
534563
return False
@@ -613,7 +642,7 @@ def normalise(trimmed_segments: dict, normalise: int, bed: list, verbose: bool =
613642
for chrom, amplicon_dict in trimmed_segments.items():
614643
for amplicon, segments in amplicon_dict.items():
615644
if amplicon not in amplicons[chrom]:
616-
raise ValueError(f"Segment {amplicon} not found in primer scheme file")
645+
raise ValueError(f"Amplicon {amplicon} not found in primer scheme file")
617646

618647
desired_depth = np.full_like(
619648
(amplicons[chrom][amplicon]["length"],), normalise, dtype=int
@@ -952,8 +981,8 @@ def main():
952981
parser.add_argument(
953982
"--no-read-groups",
954983
dest="no_read_groups",
955-
action="store_true",
956984
help="Do not divide reads into groups in SAM output",
985+
action="store_true",
957986
)
958987
parser.add_argument("--verbose", action="store_true", help="Debug mode")
959988
parser.add_argument("--remove-incorrect-pairs", action="store_true")

setup.cfg

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[metadata]
22
name = artic
3-
version = 1.7.1
3+
version = 1.7.4
44
author = Nick Loman
55
author_email = [email protected]
66
maintainer = Sam Wilkinson

0 commit comments

Comments
 (0)