|
| 1 | +#!/usr/bin/env python3 |
| 2 | + |
| 3 | +import argparse |
| 4 | +import pysam as ps |
| 5 | +from pathlib import Path |
| 6 | + |
| 7 | +def is_assigned_read(read): |
| 8 | + tags = dict(read.tags) |
| 9 | + return "HP" in tags and len(tags["HP"]) == 1 |
| 10 | + |
| 11 | +def filter_assigned_reads(in_bam_filename, out_bam_filename, region=None): |
| 12 | + in_bam = ps.AlignmentFile(in_bam_filename) |
| 13 | + out_bam = ps.AlignmentFile(out_bam_filename, 'wb', template=in_bam) |
| 14 | + contig, start, end = None, None, None |
| 15 | + if region is not None: |
| 16 | + if ':' in region: |
| 17 | + contig, rest = region.split(':') |
| 18 | + start, end = rest.split('-') |
| 19 | + start, end = start.replace(',', ''), end.replace(',', '') |
| 20 | + start, end = int(start), int(end) |
| 21 | + else: |
| 22 | + contig = region |
| 23 | + for read in in_bam.fetch(contig, start, end): |
| 24 | + if is_assigned_read(read): |
| 25 | + out_bam.write(read) |
| 26 | + out_bam.close() |
| 27 | + ps.index(str(out_bam_filename)) |
| 28 | + |
| 29 | +def main(args): |
| 30 | + if args.in_bam == args.out_bam: |
| 31 | + print("--out-bam cannot be the same as --in-bam") |
| 32 | + else: |
| 33 | + filter_assigned_reads(args.in_bam, args.out_bam, args.region) |
| 34 | + |
| 35 | +if __name__ == '__main__': |
| 36 | + parser = argparse.ArgumentParser() |
| 37 | + parser.add_argument('-I','--in-bam', |
| 38 | + type=Path, |
| 39 | + required=True, |
| 40 | + help='Input Octopus realigned BAM') |
| 41 | + parser.add_argument('-O','--out-bam', |
| 42 | + type=Path, |
| 43 | + required=True, |
| 44 | + help='Output BAM') |
| 45 | + parser.add_argument('-T','--region', |
| 46 | + type=str, |
| 47 | + required=False, |
| 48 | + help='Region to filter') |
| 49 | + parsed, unparsed = parser.parse_known_args() |
| 50 | + main(parsed) |
0 commit comments