Skip to content

Commit 0b7decb

Browse files
committed
Add noeof option
1 parent 600907a commit 0b7decb

20 files changed

+342
-21
lines changed

src/FastQValidator.cpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
#include "StringHash.h"
2222
#include "Parameters.h"
2323
#include "FastQFile.h"
24+
#include "BgzfFileType.h"
2425

2526
int main(int argc, char ** argv)
2627
{
@@ -40,6 +41,7 @@ int main(int argc, char ** argv)
4041
bool baseComposition = false;
4142
bool avgQual = false;
4243
bool quiet = false;
44+
bool noeof = false;
4345
bool params = false;
4446
bool disableSeqIDCheck = false;
4547
bool interleaved = false;
@@ -50,6 +52,7 @@ int main(int argc, char ** argv)
5052
LONG_PARAMETER("avgQual", &avgQual)
5153
LONG_PARAMETER("disableSeqIDCheck", &disableSeqIDCheck)
5254
LONG_PARAMETER("interleaved", &interleaved)
55+
LONG_PARAMETER("noeof", &noeof)
5356
LONG_PARAMETER("quiet", &quiet)
5457
LONG_PARAMETER("params", &params)
5558
LONG_INTPARAMETER("minReadLen", &minReadLength)
@@ -101,6 +104,14 @@ int main(int argc, char ** argv)
101104
autoDetect = true;
102105
}
103106

107+
// If no eof block is required for a bgzf file, set the bgzf file type to
108+
// not look for it.
109+
if(noeof)
110+
{
111+
// Set that the eof block is not required.
112+
BgzfFileType::setRequireEofBlock(false);
113+
}
114+
104115
// DO not print status if set to quiet.
105116
if((!quiet) && params)
106117
{

test/FastQValidatorTest.sh

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,38 @@ then
1515
ERROR=true
1616
fi
1717

18+
# Run on gz file
19+
$PATH_TO_EXE/fastQValidator --params --file testFile.txt.gz --minReadLen 10 --auto --printableErrors 100 --baseComposition > results/runResultsGZ.txt 2>&1
20+
diff results/runResultsGZ.txt expectedResults/ExpectedResultsAutoDetectGZ.txt
21+
if [ $? -ne 0 ]
22+
then
23+
ERROR=true
24+
fi
25+
26+
# Run on bgzf file
27+
$PATH_TO_EXE/fastQValidator --params --file testFile.txt.bgz --minReadLen 10 --auto --printableErrors 100 --baseComposition > results/runResultsBGZ.txt 2>&1
28+
diff results/runResultsBGZ.txt expectedResults/ExpectedResultsAutoDetectBGZ.txt
29+
if [ $? -ne 0 ]
30+
then
31+
ERROR=true
32+
fi
33+
34+
# Run on bgzf file with no eof causing failure
35+
$PATH_TO_EXE/fastQValidator --params --file testFile.txt.noeof.bgz --minReadLen 10 --auto --printableErrors 100 --baseComposition > results/runResultsBGZnoeofFail.txt 2>&1
36+
diff results/runResultsBGZnoeofFail.txt expectedResults/ExpectedResultsAutoDetectBGZnoeofFail.txt
37+
if [ $? -ne 0 ]
38+
then
39+
ERROR=true
40+
fi
41+
42+
# Run on bgzf file with no eof but skipping the eof check
43+
$PATH_TO_EXE/fastQValidator --noeof --params --file testFile.txt.noeof.bgz --minReadLen 10 --auto --printableErrors 100 --baseComposition > results/runResultsBGZnoeof.txt 2>&1
44+
diff results/runResultsBGZnoeof.txt expectedResults/ExpectedResultsAutoDetectBGZnoeof.txt
45+
if [ $? -ne 0 ]
46+
then
47+
ERROR=true
48+
fi
49+
1850
# Run on the same file but do not check for unique sequence id.
1951
$PATH_TO_EXE/fastQValidator --params --file testFile.txt --minReadLen 10 --auto --printableErrors 100 --baseComposition --disableSeqIDCheck > results/runResultsDisableSeqID.txt 2>&1
2052
diff results/runResultsDisableSeqID.txt expectedResults/ExpectedResultsDisableSeqID.txt

test/expectedResults/ExpectedResults0Errors.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11

22
Input Parameters
33
--file [testFile.txt], --baseComposition [ON], --avgQual, --disableSeqIDCheck,
4-
--interleaved, --quiet, --params [ON], --minReadLen [10],
5-
--maxErrors
4+
--interleaved, --noeof, --quiet, --params [ON],
5+
--minReadLen [10], --maxErrors
66
Space Type : --baseSpace [ON], --colorSpace, --auto
77
Errors : --ignoreErrors, --printableErrors [100]
88

test/expectedResults/ExpectedResults10Errors.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11

22
Input Parameters
33
--file [testFile.txt], --baseComposition [ON], --avgQual, --disableSeqIDCheck,
4-
--interleaved, --quiet, --params [ON], --minReadLen [10],
5-
--maxErrors [10]
4+
--interleaved, --noeof, --quiet, --params [ON],
5+
--minReadLen [10], --maxErrors [10]
66
Space Type : --baseSpace [ON], --colorSpace, --auto
77
Errors : --ignoreErrors, --printableErrors [100]
88

test/expectedResults/ExpectedResults10Errors5Report.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11

22
Input Parameters
33
--file [testFile.txt], --baseComposition [ON], --avgQual, --disableSeqIDCheck,
4-
--interleaved, --quiet, --params [ON], --minReadLen [10],
5-
--maxErrors [10]
4+
--interleaved, --noeof, --quiet, --params [ON],
5+
--minReadLen [10], --maxErrors [10]
66
Space Type : --baseSpace [ON], --colorSpace, --auto
77
Errors : --ignoreErrors, --printableErrors [5]
88

test/expectedResults/ExpectedResultsAutoDetect.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11

22
Input Parameters
33
--file [testFile.txt], --baseComposition [ON], --avgQual, --disableSeqIDCheck,
4-
--interleaved, --quiet, --params [ON], --minReadLen [10],
5-
--maxErrors [-1]
4+
--interleaved, --noeof, --quiet, --params [ON],
5+
--minReadLen [10], --maxErrors [-1]
66
Space Type : --baseSpace, --colorSpace, --auto [ON]
77
Errors : --ignoreErrors, --printableErrors [100]
88

Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
2+
Input Parameters
3+
--file [testFile.txt.bgz], --baseComposition [ON], --avgQual,
4+
--disableSeqIDCheck, --interleaved, --noeof, --quiet,
5+
--params [ON], --minReadLen [10], --maxErrors [-1]
6+
Space Type : --baseSpace, --colorSpace, --auto [ON]
7+
Errors : --ignoreErrors, --printableErrors [100]
8+
9+
ERROR on Line 2: Invalid character ('.') in base sequence.
10+
ERROR on Line 2: Invalid character ('0') in base sequence.
11+
ERROR on Line 2: Invalid character ('1') in base sequence.
12+
ERROR on Line 2: Invalid character ('2') in base sequence.
13+
ERROR on Line 2: Invalid character ('3') in base sequence.
14+
ERROR on Line 11: Invalid character ('1') in base sequence.
15+
ERROR on Line 11: Invalid character ('2') in base sequence.
16+
ERROR on Line 11: Invalid character ('3') in base sequence.
17+
ERROR on Line 11: Invalid character ('.') in base sequence.
18+
ERROR on Line 11: Invalid character ('0') in base sequence.
19+
ERROR on Line 11: Invalid character ('3') in base sequence.
20+
ERROR on Line 11: Invalid character ('2') in base sequence.
21+
ERROR on Line 11: Invalid character ('1') in base sequence.
22+
ERROR on Line 11: Invalid character ('.') in base sequence.
23+
ERROR on Line 11: Invalid character ('0') in base sequence.
24+
ERROR on Line 11: Invalid character ('1') in base sequence.
25+
ERROR on Line 11: Invalid character ('1') in base sequence.
26+
ERROR on Line 25: The sequence identifier line was too short.
27+
ERROR on Line 29: First line of a sequence does not begin with @
28+
ERROR on Line 33: No Sequence Identifier specified before the comment.
29+
ERROR on Line 37: No Sequence Identifier specified before the comment.
30+
ERROR on Line 41: Repeated Sequence Identifier: Valid at Lines 1 and 41
31+
ERROR on Line 46: Invalid character ('H') in base sequence.
32+
ERROR on Line 46: Invalid character ('0') in base sequence.
33+
ERROR on Line 47: Invalid character ('B') in base sequence.
34+
ERROR on Line 47: Invalid character ('Z') in base sequence.
35+
ERROR on Line 52: Raw Sequence is shorter than the min read length: 3 < 10
36+
ERROR on Line 56: Looking for continuation of Raw Sequence or '+' instead found a blank line, assuming it was part of Raw Sequence.
37+
ERROR on Line 57: Looking for continuation of Raw Sequence or '+' instead found a blank line, assuming it was part of Raw Sequence.
38+
ERROR on Line 63: Invalid character (' ') in quality string.
39+
ERROR on Line 64: Invalid character (' ') in quality string.
40+
ERROR on Line 77: Quality string length (12) does not equal raw sequence length (10)
41+
ERROR on Line 88: Sequence Identifier on '+' line does not equal the one on the '@' line.
42+
ERROR on Line 91: Invalid character ('0') in base sequence.
43+
ERROR on Line 91: Invalid character ('1') in base sequence.
44+
ERROR on Line 91: Invalid character ('2') in base sequence.
45+
ERROR on Line 91: Invalid character ('3') in base sequence.
46+
ERROR on Line 91: Invalid character ('.') in base sequence.
47+
ERROR on Line 91: Invalid character ('0') in base sequence.
48+
ERROR on Line 91: Invalid character ('3') in base sequence.
49+
ERROR on Line 91: Invalid character ('2') in base sequence.
50+
ERROR on Line 91: Invalid character ('1') in base sequence.
51+
ERROR on Line 91: Invalid character ('.') in base sequence.
52+
ERROR on Line 91: Invalid character ('0') in base sequence.
53+
ERROR on Line 91: Invalid character ('1') in base sequence.
54+
ERROR on Line 91: Invalid character ('1') in base sequence.
55+
ERROR on Line 95: Reached the end of the file without a '+' line.
56+
ERROR on Line 95: Incomplete Sequence, missing Quality String.
57+
58+
Base Composition Statistics:
59+
Read Index %A %C %G %T %N Total Reads At Index
60+
0 100.00 0.00 0.00 0.00 0.00 20
61+
1 5.26 94.74 0.00 0.00 0.00 19
62+
2 5.26 0.00 0.00 94.74 0.00 19
63+
3 0.00 0.00 94.44 0.00 5.56 18
64+
4 50.00 38.89 0.00 0.00 11.11 18
65+
5 52.94 47.06 0.00 0.00 0.00 17
66+
6 0.00 50.00 0.00 50.00 0.00 18
67+
7 0.00 0.00 38.89 11.11 50.00 18
68+
8 44.44 0.00 5.56 0.00 50.00 18
69+
9 0.00 88.24 11.76 0.00 0.00 17
70+
10 11.11 0.00 0.00 88.89 0.00 9
71+
11 0.00 0.00 0.00 0.00 100.00 8
72+
12 0.00 0.00 100.00 0.00 0.00 8
73+
13 100.00 0.00 0.00 0.00 0.00 4
74+
14 33.33 66.67 0.00 0.00 0.00 3
75+
15 0.00 33.33 0.00 66.67 0.00 3
76+
16 0.00 0.00 100.00 0.00 0.00 2
77+
17 100.00 0.00 0.00 0.00 0.00 2
78+
18 0.00 100.00 0.00 0.00 0.00 2
79+
19 0.00 0.00 0.00 100.00 0.00 2
80+
20 0.00 0.00 0.00 0.00 100.00 2
81+
21 100.00 0.00 0.00 0.00 0.00 2
82+
22 0.00 100.00 0.00 0.00 0.00 2
83+
23 0.00 0.00 0.00 100.00 0.00 2
84+
24 0.00 0.00 0.00 0.00 100.00 2
85+
25 0.00 0.00 100.00 0.00 0.00 2
86+
87+
Finished processing testFile.txt.bgz with 95 lines containing 21 sequences.
88+
There were a total of 48 errors.
89+
Returning: 1 : FASTQ_INVALID
Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
2+
Input Parameters
3+
--file [testFile.txt.noeof.bgz], --baseComposition [ON], --avgQual,
4+
--disableSeqIDCheck, --interleaved, --noeof [ON], --quiet,
5+
--params [ON], --minReadLen [10], --maxErrors [-1]
6+
Space Type : --baseSpace, --colorSpace, --auto [ON]
7+
Errors : --ignoreErrors, --printableErrors [100]
8+
9+
ERROR on Line 2: Invalid character ('.') in base sequence.
10+
ERROR on Line 2: Invalid character ('0') in base sequence.
11+
ERROR on Line 2: Invalid character ('1') in base sequence.
12+
ERROR on Line 2: Invalid character ('2') in base sequence.
13+
ERROR on Line 2: Invalid character ('3') in base sequence.
14+
ERROR on Line 11: Invalid character ('1') in base sequence.
15+
ERROR on Line 11: Invalid character ('2') in base sequence.
16+
ERROR on Line 11: Invalid character ('3') in base sequence.
17+
ERROR on Line 11: Invalid character ('.') in base sequence.
18+
ERROR on Line 11: Invalid character ('0') in base sequence.
19+
ERROR on Line 11: Invalid character ('3') in base sequence.
20+
ERROR on Line 11: Invalid character ('2') in base sequence.
21+
ERROR on Line 11: Invalid character ('1') in base sequence.
22+
ERROR on Line 11: Invalid character ('.') in base sequence.
23+
ERROR on Line 11: Invalid character ('0') in base sequence.
24+
ERROR on Line 11: Invalid character ('1') in base sequence.
25+
ERROR on Line 11: Invalid character ('1') in base sequence.
26+
ERROR on Line 25: The sequence identifier line was too short.
27+
ERROR on Line 29: First line of a sequence does not begin with @
28+
ERROR on Line 33: No Sequence Identifier specified before the comment.
29+
ERROR on Line 37: No Sequence Identifier specified before the comment.
30+
ERROR on Line 41: Repeated Sequence Identifier: Valid at Lines 1 and 41
31+
ERROR on Line 46: Invalid character ('H') in base sequence.
32+
ERROR on Line 46: Invalid character ('0') in base sequence.
33+
ERROR on Line 47: Invalid character ('B') in base sequence.
34+
ERROR on Line 47: Invalid character ('Z') in base sequence.
35+
ERROR on Line 52: Raw Sequence is shorter than the min read length: 3 < 10
36+
ERROR on Line 56: Looking for continuation of Raw Sequence or '+' instead found a blank line, assuming it was part of Raw Sequence.
37+
ERROR on Line 57: Looking for continuation of Raw Sequence or '+' instead found a blank line, assuming it was part of Raw Sequence.
38+
ERROR on Line 63: Invalid character (' ') in quality string.
39+
ERROR on Line 64: Invalid character (' ') in quality string.
40+
ERROR on Line 77: Quality string length (12) does not equal raw sequence length (10)
41+
ERROR on Line 88: Sequence Identifier on '+' line does not equal the one on the '@' line.
42+
ERROR on Line 91: Invalid character ('0') in base sequence.
43+
ERROR on Line 91: Invalid character ('1') in base sequence.
44+
ERROR on Line 91: Invalid character ('2') in base sequence.
45+
ERROR on Line 91: Invalid character ('3') in base sequence.
46+
ERROR on Line 91: Invalid character ('.') in base sequence.
47+
ERROR on Line 91: Invalid character ('0') in base sequence.
48+
ERROR on Line 91: Invalid character ('3') in base sequence.
49+
ERROR on Line 91: Invalid character ('2') in base sequence.
50+
ERROR on Line 91: Invalid character ('1') in base sequence.
51+
ERROR on Line 91: Invalid character ('.') in base sequence.
52+
ERROR on Line 91: Invalid character ('0') in base sequence.
53+
ERROR on Line 91: Invalid character ('1') in base sequence.
54+
ERROR on Line 91: Invalid character ('1') in base sequence.
55+
ERROR on Line 95: Reached the end of the file without a '+' line.
56+
ERROR on Line 95: Incomplete Sequence, missing Quality String.
57+
58+
Base Composition Statistics:
59+
Read Index %A %C %G %T %N Total Reads At Index
60+
0 100.00 0.00 0.00 0.00 0.00 20
61+
1 5.26 94.74 0.00 0.00 0.00 19
62+
2 5.26 0.00 0.00 94.74 0.00 19
63+
3 0.00 0.00 94.44 0.00 5.56 18
64+
4 50.00 38.89 0.00 0.00 11.11 18
65+
5 52.94 47.06 0.00 0.00 0.00 17
66+
6 0.00 50.00 0.00 50.00 0.00 18
67+
7 0.00 0.00 38.89 11.11 50.00 18
68+
8 44.44 0.00 5.56 0.00 50.00 18
69+
9 0.00 88.24 11.76 0.00 0.00 17
70+
10 11.11 0.00 0.00 88.89 0.00 9
71+
11 0.00 0.00 0.00 0.00 100.00 8
72+
12 0.00 0.00 100.00 0.00 0.00 8
73+
13 100.00 0.00 0.00 0.00 0.00 4
74+
14 33.33 66.67 0.00 0.00 0.00 3
75+
15 0.00 33.33 0.00 66.67 0.00 3
76+
16 0.00 0.00 100.00 0.00 0.00 2
77+
17 100.00 0.00 0.00 0.00 0.00 2
78+
18 0.00 100.00 0.00 0.00 0.00 2
79+
19 0.00 0.00 0.00 100.00 0.00 2
80+
20 0.00 0.00 0.00 0.00 100.00 2
81+
21 100.00 0.00 0.00 0.00 0.00 2
82+
22 0.00 100.00 0.00 0.00 0.00 2
83+
23 0.00 0.00 0.00 100.00 0.00 2
84+
24 0.00 0.00 0.00 0.00 100.00 2
85+
25 0.00 0.00 100.00 0.00 0.00 2
86+
87+
Finished processing testFile.txt.noeof.bgz with 95 lines containing 21 sequences.
88+
There were a total of 48 errors.
89+
Returning: 1 : FASTQ_INVALID
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
2+
Input Parameters
3+
--file [testFile.txt.noeof.bgz], --baseComposition [ON], --avgQual,
4+
--disableSeqIDCheck, --interleaved, --noeof [ON], --quiet,
5+
--params [ON], --minReadLen [10], --maxErrors [-1]
6+
Space Type : --baseSpace, --colorSpace, --auto [ON]
7+
Errors : --ignoreErrors, --printableErrors [100]
8+
9+
BGZF EOF marker is missing in testFile.txt.noeof.bgz
10+
ERROR: Failed to open file: testFile.txt.noeof.bgz
11+
Returning: 3 : FASTQ_OPEN_ERROR

0 commit comments

Comments
 (0)