Skip to content

Commit d2aad91

Browse files
author
Daniel Cooke
committed
Add filter_assigned_reads.py to scripts
1 parent c206da4 commit d2aad91

File tree

1 file changed

+50
-0
lines changed

1 file changed

+50
-0
lines changed

scripts/filter_assigned_reads.py

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
#!/usr/bin/env python3
2+
3+
import argparse
4+
import pysam as ps
5+
from pathlib import Path
6+
7+
def is_assigned_read(read):
8+
tags = dict(read.tags)
9+
return "HP" in tags and len(tags["HP"]) == 1
10+
11+
def filter_assigned_reads(in_bam_filename, out_bam_filename, region=None):
12+
in_bam = ps.AlignmentFile(in_bam_filename)
13+
out_bam = ps.AlignmentFile(out_bam_filename, 'wb', template=in_bam)
14+
contig, start, end = None, None, None
15+
if region is not None:
16+
if ':' in region:
17+
contig, rest = region.split(':')
18+
start, end = rest.split('-')
19+
start, end = start.replace(',', ''), end.replace(',', '')
20+
start, end = int(start), int(end)
21+
else:
22+
contig = region
23+
for read in in_bam.fetch(contig, start, end):
24+
if is_assigned_read(read):
25+
out_bam.write(read)
26+
out_bam.close()
27+
ps.index(str(out_bam_filename))
28+
29+
def main(args):
30+
if args.in_bam == args.out_bam:
31+
print("--out-bam cannot be the same as --in-bam")
32+
else:
33+
filter_assigned_reads(args.in_bam, args.out_bam, args.region)
34+
35+
if __name__ == '__main__':
36+
parser = argparse.ArgumentParser()
37+
parser.add_argument('-I','--in-bam',
38+
type=Path,
39+
required=True,
40+
help='Input Octopus realigned BAM')
41+
parser.add_argument('-O','--out-bam',
42+
type=Path,
43+
required=True,
44+
help='Output BAM')
45+
parser.add_argument('-T','--region',
46+
type=str,
47+
required=False,
48+
help='Region to filter')
49+
parsed, unparsed = parser.parse_known_args()
50+
main(parsed)

0 commit comments

Comments
 (0)