Skip to content

Commit a7ea4cc

Browse files
authored
Merge pull request #77 from bacpop/reads_in_hdf5
Add reads attribute to databases
2 parents 282dc5a + 6394872 commit a7ea4cc

File tree

5 files changed

+27
-11
lines changed

5 files changed

+27
-11
lines changed

CMakeLists.txt

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -45,8 +45,6 @@ if(DEFINED ENV{CONDA_PREFIX})
4545
include_directories($ENV{CONDA_PREFIX}/include)
4646
link_directories($ENV{CONDA_PREFIX}/lib)
4747
link_directories($ENV{CONDA_PREFIX}/lib/intel64)
48-
else()
49-
find_package(OpenMP)
5048
endif()
5149

5250
# Add libraries
@@ -148,6 +146,7 @@ if(DEFINED ENV{CONDA_PREFIX} AND (NOT APPLE OR CMAKE_COMPILER_IS_GNUCC OR ENV{SK
148146
target_link_libraries("${TARGET_NAME}" PRIVATE gomp z)
149147
else()
150148
target_link_libraries("${TARGET_NAME}" PRIVATE ZLIB::ZLIB)
149+
find_package(OpenMP)
151150
if(OpenMP_CXX_FOUND)
152151
target_link_libraries("${TARGET_NAME}" PRIVATE OpenMP::OpenMP_CXX)
153152
endif()

src/database/database.cpp

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,8 @@ void Database::add_sketch(const Reference &ref)
6868
length_a.write(ref.seq_length());
6969
HighFive::Attribute missing_a = sketch_group.createAttribute<unsigned long int>("missing_bases", HighFive::DataSpace::From(ref.missing_bases()));
7070
missing_a.write(ref.missing_bases());
71+
HighFive::Attribute reads_a = sketch_group.createAttribute<bool>("reads", HighFive::DataSpace::From(ref.is_reads()));
72+
reads_a.write(ref.is_reads());
7173

7274
// Write base composition and k-mer length vectors as further group attributes
7375
const std::vector<double> bases = ref.base_composition();
@@ -117,6 +119,7 @@ Reference Database::load_sketch(const std::string &name)
117119
size_t seq_size = DEFAULT_LENGTH;
118120
std::vector<double> bases{0.25, 0.25, 0.25, 0.25};
119121
unsigned long int missing_bases = 0;
122+
bool reads = false;
120123
std::vector<std::string> attributes_keys = sketch_group.listAttributeNames();
121124
for (const auto &attr : attributes_keys)
122125
{
@@ -132,9 +135,13 @@ Reference Database::load_sketch(const std::string &name)
132135
{
133136
sketch_group.getAttribute("missing_bases").read(missing_bases);
134137
}
138+
else if (attr == "reads")
139+
{
140+
sketch_group.getAttribute("reads").read(reads);
141+
}
135142
}
136143

137-
Reference new_ref(name, bbits, sketchsize64, seq_size, bases, missing_bases);
144+
Reference new_ref(name, bbits, sketchsize64, seq_size, bases, missing_bases, reads);
138145
for (auto kmer_it = kmer_lengths.cbegin(); kmer_it != kmer_lengths.cend(); kmer_it++)
139146
{
140147
std::vector<uint64_t> usigs;

src/gpu/gpu_api.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -138,7 +138,8 @@ std::vector<Reference> create_sketches_cuda(
138138
sketches[i + j] =
139139
Reference(names[i + j], usigs, def_bbits, sketchsize64,
140140
seq_length, seq_in_batch[j]->get_composition(),
141-
seq_in_batch[j]->missing_bases(), use_rc, densified);
141+
seq_in_batch[j]->missing_bases(), use_rc, densified,
142+
seq_in_batch[j]->is_reads());
142143
sketch_db.add_sketch(sketches[i + j]);
143144
if (densified) {
144145
std::cerr << "NOTE: " << names[i + j] << " required densification"

src/reference.cpp

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ Reference::Reference()
2323
: _bbits(def_bbits),
2424
_sketchsize64(def_sketchsize64),
2525
_use_rc(true),
26+
_reads(false),
2627
_seq_size(0),
2728
_densified(false) {}
2829

@@ -38,6 +39,7 @@ Reference::Reference(const std::string &name,
3839
_bbits(def_bbits),
3940
_sketchsize64(sketchsize64),
4041
_use_rc(use_rc),
42+
_reads(sequence.is_reads()),
4143
_seq_size(0),
4244
_densified(false)
4345
{
@@ -78,9 +80,11 @@ Reference::Reference(const std::string &name,
7880
const size_t sketchsize64,
7981
const size_t seq_size,
8082
const std::vector<double> bases,
81-
const unsigned long int missing_bases)
83+
const unsigned long int missing_bases,
84+
const bool reads)
8285
: _name(name), _bbits(bbits), _sketchsize64(sketchsize64), _use_rc(true),
83-
_seq_size(seq_size), _missing_bases(missing_bases), _densified(false)
86+
_reads(reads), _seq_size(seq_size), _missing_bases(missing_bases),
87+
_densified(false)
8488
{
8589
_bases.a = bases[0];
8690
_bases.c = bases[1];
@@ -97,10 +101,11 @@ Reference::Reference(const std::string &name,
97101
const BaseComp<double> &bases,
98102
const unsigned long int missing_bases,
99103
const bool use_rc,
100-
const bool densified)
104+
const bool densified,
105+
const bool reads)
101106
: _name(name), _bbits(bbits), _sketchsize64(sketchsize64), _use_rc(use_rc),
102-
_seq_size(seq_size), _missing_bases(missing_bases), _densified(densified),
103-
_bases(bases), usigs(sketch) {}
107+
_reads(reads), _seq_size(seq_size), _missing_bases(missing_bases),
108+
_densified(densified), _bases(bases), usigs(sketch) {}
104109

105110
double Reference::jaccard_dist(Reference &query, const int kmer_len, const double random_jaccard)
106111
{

src/reference.hpp

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,8 @@ class Reference
4444
const size_t sketchsize64,
4545
const size_t seq_size,
4646
const std::vector<double> bases,
47-
const unsigned long int missing_bases);
47+
const unsigned long int missing_bases,
48+
const bool reads);
4849

4950
// Initialise from GPU sketch
5051
Reference(const std::string &name,
@@ -55,7 +56,8 @@ class Reference
5556
const BaseComp<double> &bases,
5657
const unsigned long int missing_bases,
5758
const bool use_rc,
58-
const bool densified);
59+
const bool densified,
60+
const bool reads);
5961

6062
const std::vector<uint64_t> &get_sketch(const int kmer_len) const;
6163
void add_kmer_sketch(const std::vector<uint64_t> &sketch, const int kmer_len);
@@ -74,6 +76,7 @@ class Reference
7476
size_t seq_length() const { return _seq_size; }
7577
bool densified() const { return _densified; }
7678
bool rc() const { return _use_rc; }
79+
bool is_reads() const { return _reads; }
7780
std::vector<double> base_composition() const { return {_bases.a, _bases.c, _bases.g, _bases.t}; }
7881
unsigned long int missing_bases() const { return _missing_bases; }
7982

@@ -93,6 +96,7 @@ class Reference
9396
size_t _bbits;
9497
size_t _sketchsize64;
9598
bool _use_rc;
99+
bool _reads;
96100

97101
// Sequence statistics
98102
size_t _seq_size;

0 commit comments

Comments
 (0)