Skip to content

Commit 9c416f7

Browse files
authored
Merge pull request #72 from bacpop/v2.0.0_candidate
Update to v2: new CLI
2 parents e5b11eb + 1e8cb9b commit 9c416f7

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

41 files changed

+1585
-1152
lines changed

CMakeLists.txt

Lines changed: 59 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -20,27 +20,33 @@ set(TARGET_NAME pp_sketchlib)
2020
add_compile_definitions(PYTHON_EXT)
2121

2222
# gcc: Add openmp
23-
# gcc: Add -O0 to remove optimizations when using debug
24-
IF(CMAKE_COMPILER_IS_GNUCC)
23+
IF(CMAKE_COMPILER_IS_GNUCC OR "$ENV{SKETCHLIB_INSTALL}" STREQUAL "conda")
2524
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fopenmp")
26-
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -O0")
27-
set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} -O0")
28-
ENDIF(CMAKE_COMPILER_IS_GNUCC)
25+
ENDIF()
26+
27+
# Add -O0 to remove optimizations when using debug
28+
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -O0")
29+
set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} -O0")
2930

3031
if(UNIX AND NOT APPLE)
3132
if(CMAKE_CXX_COMPILER STREQUAL "icpc")
32-
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fast -xCASCADELAKE -DMKL_ILP64 -m64 -static-intel")
33+
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fast -march=\"native\" -DMKL_ILP64 -m64 -static-intel")
3334
else()
3435
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D__STDC_LIMIT_MACROS -D__STDC_CONSTANT_MACROS")
3536
set(CMAKE_LD_FLAGS "${CMAKE_LDFLAGS} -Wl,--as-needed")
3637
endif()
3738
endif()
3839

40+
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall")
41+
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -ffast-math -funroll-loops -m64")
42+
3943
# Set paths for non standard lib/ and include/ locations
4044
if(DEFINED ENV{CONDA_PREFIX})
4145
include_directories($ENV{CONDA_PREFIX}/include)
4246
link_directories($ENV{CONDA_PREFIX}/lib)
4347
link_directories($ENV{CONDA_PREFIX}/lib/intel64)
48+
else()
49+
find_package(OpenMP)
4450
endif()
4551

4652
# Add libraries
@@ -50,12 +56,12 @@ include_directories(${HDF5_INCLUDE_DIRS})
5056
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/vendor/highfive/include)
5157
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/src)
5258

59+
find_package(ZLIB)
5360
execute_process(COMMAND pybind11-config --cmakedir OUTPUT_STRIP_TRAILING_WHITESPACE OUTPUT_VARIABLE pybind11_DIR)
5461
find_package(pybind11 2.6 CONFIG REQUIRED)
5562
find_package(Eigen3 3.3 REQUIRED NO_MODULE)
5663
find_package(Armadillo REQUIRED)
5764
include_directories(${ARMADILLO_INCLUDE_DIRS})
58-
#find_package(OpenMP) # This links system openmp if present - conda sorts out rpath but take care
5965

6066
# Define python library target
6167
add_library("${TARGET_NAME}" MODULE)
@@ -64,42 +70,46 @@ add_library("${TARGET_NAME}" MODULE)
6470
include(CheckLanguage)
6571
check_language(CUDA)
6672
if(CMAKE_CUDA_COMPILER)
67-
message(STATUS "CUDA found, compiling both GPU and CPU code")
6873
enable_language(CUDA)
69-
70-
# PIC/relocatable-device-code needed as this is linked by gcc later
71-
# -Xptxas -dlcm=ca turns cache on, but not needed in recent nvcc versions
72-
# --cudart static: static linking of the CUDA libraries
73-
# -gencode arch=compute_35 etc compiles for each (minimum) device version listed (v3.5, v5.0, v7.5)
74-
set(CUDA_OPTS "-Xcompiler -fPIC -Xptxas -dlcm=ca --relocatable-device-code=true --expt-relaxed-constexpr")
75-
# Turn on link time optimisation if available
76-
if(CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER 11.0 AND CMAKE_BUILD_TYPE MATCHES Release)
77-
string(APPEND CUDA_OPTS " -dlto -arch=sm_86")
74+
if (CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER 11.0)
75+
message(STATUS "CUDA >11.0 found, compiling both GPU and CPU code")
76+
77+
# PIC/relocatable-device-code needed as this is linked by gcc later
78+
# -Xptxas -dlcm=ca turns cache on, but not needed in recent nvcc versions
79+
# --cudart static: static linking of the CUDA libraries
80+
# -gencode arch=compute_35 etc compiles for each (minimum) device version listed (v3.5, v5.0, v7.5)
81+
set(CUDA_OPTS "-Xcompiler -fPIC -Xptxas -dlcm=ca --relocatable-device-code=true --expt-relaxed-constexpr")
82+
# Turn on link time optimisation if available
83+
if(CMAKE_BUILD_TYPE MATCHES Release)
84+
string(APPEND CUDA_OPTS " -dlto -arch=sm_86")
85+
else()
86+
string(APPEND CUDA_OPTS " -gencode arch=compute_70,code=sm_70 -gencode arch=compute_75,code=sm_75 -gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86")
87+
endif()
88+
if(CMAKE_BUILD_TYPE MATCHES Debug)
89+
string(APPEND CUDA_OPTS " -G")
90+
endif()
91+
92+
set(CMAKE_CUDA_FLAGS "${CUDA_OPTS}")
93+
94+
add_compile_definitions(GPU_AVAILABLE)
95+
add_library("${TARGET_NAME}_CUDA" OBJECT src/gpu/dist.cu
96+
src/gpu/sketch.cu
97+
src/gpu/device_memory.cu
98+
src/gpu/gpu_countmin.cu
99+
src/gpu/device_reads.cu)
100+
target_include_directories("${TARGET_NAME}_CUDA" PRIVATE "${EIGEN3_INCLUDE_DIR}" "${pybind11_INCLUDE_DIRS}")
101+
set_property(TARGET "${TARGET_NAME}_CUDA"
102+
PROPERTY POSITION_INDEPENDENT_CODE ON
103+
CUDA_SEPARABLE_COMPILATION ON
104+
CUDA_RESOLVE_DEVICE_SYMBOLS ON # try and ensure device link with nvcc
105+
CUDA_VISIBILITY_PRESET "hidden"
106+
CUDA_RUNTIME_LIBRARY Static)
107+
#CUDA_ARCHITECTURES OFF) # set off as done explicitly above (due to dlto complexities)
108+
# CPU code/gcc compiled code needed by cuda lib
109+
target_sources("${TARGET_NAME}" PRIVATE src/gpu/gpu_api.cpp)
78110
else()
79-
string(APPEND CUDA_OPTS " -gencode arch=compute_70,code=sm_70 -gencode arch=compute_75,code=sm_75 -gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86")
80-
endif()
81-
if(CMAKE_BUILD_TYPE MATCHES Debug)
82-
string(APPEND CUDA_OPTS " -G")
111+
message(STATUS "CUDA >=11.0 required, compiling CPU code only")
83112
endif()
84-
85-
set(CMAKE_CUDA_FLAGS "${CUDA_OPTS}")
86-
87-
add_compile_definitions(GPU_AVAILABLE)
88-
add_library("${TARGET_NAME}_CUDA" OBJECT src/gpu/dist.cu
89-
src/gpu/sketch.cu
90-
src/gpu/device_memory.cu
91-
src/gpu/gpu_countmin.cu
92-
src/gpu/device_reads.cu)
93-
target_include_directories("${TARGET_NAME}_CUDA" PRIVATE "${EIGEN3_INCLUDE_DIR}" "${pybind11_INCLUDE_DIRS}")
94-
set_property(TARGET "${TARGET_NAME}_CUDA"
95-
PROPERTY POSITION_INDEPENDENT_CODE ON
96-
CUDA_SEPARABLE_COMPILATION ON
97-
CUDA_RESOLVE_DEVICE_SYMBOLS ON # try and ensure device link with nvcc
98-
CUDA_VISIBILITY_PRESET "hidden"
99-
CUDA_RUNTIME_LIBRARY Static)
100-
#CUDA_ARCHITECTURES OFF) # set off as done explicitly above (due to dlto complexities)
101-
# CPU code/gcc compiled code needed by cuda lib
102-
target_sources("${TARGET_NAME}" PRIVATE src/gpu/gpu_api.cpp)
103113
else()
104114
message(STATUS "CUDA not found, compiling CPU code only")
105115
endif()
@@ -133,7 +143,12 @@ if(CMAKE_CUDA_COMPILER)
133143
set_property(TARGET "${TARGET_NAME}" PROPERTY CUDA_RESOLVE_DEVICE_SYMBOLS ON)
134144
#set_property(TARGET "${TARGET_NAME}" PROPERTY CUDA_ARCHITECTURES OFF)
135145
endif()
136-
target_link_libraries("${TARGET_NAME}" PRIVATE pybind11::module Eigen3::Eigen z ${HDF5_LIBRARIES} gomp openblas lapack gfortran m dl)
137-
#if(OpenMP_CXX_FOUND)
138-
# target_link_libraries("${TARGET_NAME}" PRIVATE OpenMP::OpenMP_CXX)
139-
#endif()
146+
target_link_libraries("${TARGET_NAME}" PRIVATE pybind11::module Eigen3::Eigen ${HDF5_LIBRARIES} openblas lapack gfortran m dl)
147+
if(DEFINED ENV{CONDA_PREFIX} AND (NOT APPLE OR CMAKE_COMPILER_IS_GNUCC OR ENV{SKETCHLIB_INSTALL} EQUAL "conda"))
148+
target_link_libraries("${TARGET_NAME}" PRIVATE gomp z)
149+
else()
150+
target_link_libraries("${TARGET_NAME}" PRIVATE ZLIB::ZLIB)
151+
if(OpenMP_CXX_FOUND)
152+
target_link_libraries("${TARGET_NAME}" PRIVATE OpenMP::OpenMP_CXX)
153+
endif()
154+
endif()

README.md

Lines changed: 25 additions & 94 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,13 @@
11
# pp-sketchlib <img src='sketchlib_logo.png' align="right" height="139" />
22

33
<!-- badges: start -->
4-
[![Build status](https://dev.azure.com/jlees/pp-sketchlib/_apis/build/status/johnlees.pp-sketchlib?branchName=master)](https://dev.azure.com/jlees/pp-sketchlib/_build/latest?definitionId=1&branchName=master)
4+
[![Build Status](https://dev.azure.com/jlees/pp-sketchlib/_apis/build/status/bacpop.pp-sketchlib?branchName=master)](https://dev.azure.com/jlees/pp-sketchlib/_build/latest?definitionId=4&branchName=master)
5+
[![Build status](https://badge.buildkite.com/b1bc9ccd16211ca5a55846b95e297554e5aa3b544d8cb752b0.svg?branch=master;theme=github)](https://buildkite.com/mrc-ide/pp-sketchlib)
56
[![Anaconda package](https://anaconda.org/conda-forge/pp-sketchlib/badges/version.svg)](https://anaconda.org/conda-forge/pp-sketchlib)
67
<!-- badges: end -->
78

89

9-
Library of sketching functions used by [PopPUNK](https://www.poppunk.net>).
10+
Library of sketching functions used by [PopPUNK](https://www.poppunk.net>). See documentation at http://poppunk.readthedocs.io/en/latest/sketching.html
1011

1112
## Installation
1213

@@ -67,7 +68,7 @@ installed (tested on 10.2 and 11.0).
6768
Create a set of sketches and save these as a database:
6869

6970
```
70-
poppunk_sketch --sketch --rfile rfiles.txt --ref-db listeria --sketch-size 10000 --cpus 4 --min-k 15 --k-step 2
71+
sketchlib sketch -l rfiles.txt -o listeria --cpus 4
7172
```
7273

7374
The input file `rfiles.txt` has one sequence per line. The first column is the sample name, subsequent tab-separated
@@ -79,52 +80,47 @@ sample2 sample2.fa
7980
sample3 sample3_1.fq.gz sample3_2.fq.gz
8081
```
8182

82-
Calculate core and accessory distances between databases with `--query`. If all-vs-all, only the upper triangle is calculated,
83+
Calculate core and accessory distances between databases with `query dist`. If all-vs-all, only the upper triangle is calculated,
8384
for example:
8485

8586
```
86-
poppunk_sketch --query --ref_db listeria --query_db listeria --cpus 4
87+
poppunk_sketch query dist listeria --cpus 4
8788
```
8889

89-
This will save output files as a database for use with PopPUNK. If you wish to output the
90-
distances add the `--print` option:
91-
92-
```
93-
poppunk_sketch --query --ref_db listeria --query_db listeria --cpus 4 --print > distances.txt
94-
```
90+
This will print the distances to STDOUT and can be captured with `>`. If you wish to output save output files as a database for use with PopPUNK.add the `-o` option.
9591

9692
### Other options
9793

9894
Sketching:
9995

100-
- `--strand` ignores reverse complement k-mers, if input is all in the same sense
96+
- `--single-strand` ignores reverse complement k-mers, if input is all in the same sense
10197
- `--min-count` minimum k-mer count to include when using reads
10298
- `--exact-counter` uses a hash table to count k-mers, which is recommended for non-bacterial datasets.
10399

104100
Query:
105101

106102
- To only use some of the samples in the sketch database, you can add the `--subset` option with a file which lists the required sample names.
107-
- `--jaccard` will output the Jaccard distances, rather than core and accessory distances.
103+
- `query jaccard` will output the Jaccard distances, rather than core and accessory distances.
104+
- `query sparse` will output a sparse distance matrix,
105+
using either a `--threshold` or the k-nearest (`-kNN`).
108106

109107
### Large datasets
110108

111109
When working with large datasets, you can increase the `--cpus` to high numbers and get
112110
a roughly proportional performance increase.
113111

114112
For calculating sketches of read datasets, or large numbers of distances, and you have a CUDA compatible GPU,
115-
you can calculate distances on your graphics device even more quickly. Add the `--use-gpu` option:
113+
you can calculate distances on your graphics device even more quickly. Add the `--gpu` option with the desired
114+
device ID:
116115

117116
```
118-
poppunk_sketch --sketch --rfile rfiles.txt --ref-db listeria --cpus 4 --use-gpu
119-
poppunk_sketch --query --ref-db listeria --query-db listeria --use-gpu
117+
sketchlib sketch -l rfiles.txt -o listeria --cpus 4 --gpu 0
118+
sketchlib query dist listeria --gpu 0
120119
```
121120

122121
Both CPU parallelism and the GPU will be used, so be sure to add
123-
both `--cpus` and `--use-gpu` for maximum speed. This is particularly efficient
124-
when sketching.
125-
126-
You can set the `--gpu-id` if you have more than one device, which may be necessary on
127-
cluster systems. This mode can also benefit from having multiple CPU cores available too.
122+
both `--cpus` and `--gpu` for maximum speed. This is particularly efficient
123+
when sketching reads.
128124

129125
### Benchmarks
130126

@@ -193,12 +189,16 @@ contain `sketch` and may contain `random`. Run `h5dump` to see the full contents
193189
Contents are programmatically accessible with any HDF5 API. See `__main__.py` for an
194190
example in python.
195191

192+
See `poppunk_db_info` from the [PopPUNK](https://github.com/johnlees/PopPUNK) package for pretty printing.
193+
196194
#### sketch
197195

198196
Attributes:
199197

200198
- `sketch_version` - version of sketching code used to create the database.
201199
The SHA1 hash of relevant code files (doesn't change with every commit).
200+
- `codon_phased` - 1 if codon-phased seeds were used.
201+
- `reverse_complement` - 0 if `--single-strand`.
202202

203203
Contains a group for each sample, within each has attributes:
204204

@@ -230,78 +230,6 @@ Datasets:
230230
- `table_keys` - sample order of `table_values`.
231231
- `table_values` - centroid ID assigned to each sample.
232232

233-
C++
234-
---
235-
I have yet to set up a proper namespace for this, but you can include this
236-
code (`api.hpp` will do most functions) and use the parts you need. If you
237-
are interested in this becoming more functional, please raise an issue.
238-
239-
See `main.cpp` for examples:
240-
241-
```
242-
#include <fstream>
243-
#include <iostream>
244-
245-
#include "reference.hpp"
246-
#include "database.hpp"
247-
#include "random_match.hpp"
248-
#include "api.hpp"
249-
250-
// Set k-mer lengths
251-
std::vector<size_t> kmer_lengths {15, 17, 19, 21, 23, 25, 27, 29};
252-
253-
// Create a two sketches
254-
Reference ref(argv[1], {argv[2]}, kmer_lengths, 156, true, 0, false);
255-
Reference query(argv[3], {argv[4]}, kmer_lengths, 156, true, 0, false);
256-
257-
// Use default random match chances
258-
RandomMC random(true);
259-
260-
// Output some distances at a single k-mer length
261-
std::cout << ref.jaccard_dist(query, 15, random) << std::endl;
262-
std::cout << ref.jaccard_dist(query, 29, random) << std::endl;
263-
264-
// Calculate core and accessory distances between two sketches
265-
auto core_acc = ref.core_acc_dist<RandomMC>(query, random);
266-
std::cout << std::get<0>(core_acc) << "\t" << std::get<1>(core_acc) << std::endl;
267-
268-
// Save sketches to file
269-
Database sketch_db("sketch.h5");
270-
sketch_db.add_sketch(ref);
271-
sketch_db.add_sketch(query);
272-
273-
// Read sketches from file
274-
Reference ref_read = sketch_db.load_sketch(argv[1]);
275-
Reference query_read = sketch_db.load_sketch(argv[3]);
276-
// Create sketches using multiple threads, saving to file
277-
std::vector<Reference> ref_sketches = create_sketches("full",
278-
{argv[1], argv[3]},
279-
{{argv[2]}, {argv[4]}},
280-
kmer_lengths,
281-
156,
282-
true,
283-
0,
284-
false,
285-
2);
286-
// Calculate distances between sketches using multiple threads
287-
MatrixXf dists = query_db(ref_sketches,
288-
ref_sketches,
289-
kmer_lengths,
290-
random,
291-
false,
292-
2);
293-
std::cout << dists << std::endl;
294-
295-
// Read sketches from an existing database, using random access
296-
HighFive::File h5_db("listeria.h5");
297-
Database listeria_db(h5_db);
298-
std::vector<Reference> listeria_sketches;
299-
for (auto name_it = names.cbegin(); name_it != names.cend(); name_it++)
300-
{
301-
listeria_sketches.push_back(listeria_db.load_sketch(*name_it));
302-
}
303-
```
304-
305233
## Algorithms
306234

307235
### Sketching
@@ -363,7 +291,7 @@ Blais & Blanchette is used (formula 6 in the paper cited below).
363291
sketch each separately and join the databases.
364292
- GPU sketching filters out any read containing an N, which may give slightly
365293
different results from the CPU code.
366-
- GPU sketching with variable read lengths is untested, but theoretically supported.
294+
- GPU sketching with variable read lengths is unsupported. Illumina data only for now!
367295
- GPU distances use lower precision than the CPU code, so slightly different results
368296
are expected.
369297

@@ -427,6 +355,9 @@ Modifiers:
427355
- `PROFILE=1` runs with profiler flags for `ncu` and `nsys`
428356
- `GPU=1` also build CUDA code (assumes `/usr/local/cuda-11.1/` and SM v8.6)
429357

358+
### azure
359+
The repository key for the ubuntu CUDA install is periodically updated, which may cause build failures. See https://developer.nvidia.com/blog/updating-the-cuda-linux-gpg-repository-key/ and update in `azure-pipelines.yml`.
360+
430361
### Test that Python can build an installable package
431362

432363
Build a python source package and install it into an empty docker container with vanilla python 3. If this works, then there's a good chance that the version uploaded to pypi will work

azure-pipelines.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ steps:
2929
- script: |
3030
wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/cuda-ubuntu1804.pin
3131
sudo mv cuda-ubuntu1804.pin /etc/apt/preferences.d/cuda-repository-pin-600
32-
sudo apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/7fa2af80.pub
32+
sudo apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/3bf863cc.pub
3333
sudo add-apt-repository "deb https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/ /"
3434
sudo apt-get update
3535
sudo apt-get -y install cuda=11.2.2-1
@@ -54,5 +54,5 @@ steps:
5454
export CUDA_HOME=/usr/local/cuda-11.2
5555
export PATH=${CUDA_HOME}/bin${PATH:+:${PATH}}
5656
export LD_LIBRARY_PATH=${CUDA_HOME}/lib64${LD_LIBRARY_PATH:+:${LD_LIBRARY_PATH}}
57-
cd test && python run_test.py --no-cpp
57+
cd test && python run_test.py
5858
displayName: 'Run tests (run_test.py)'

docker/test

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,4 +8,4 @@ HERE=$(dirname $0)
88
[ ! -z $(docker images -q $TAG_SHA) ] || docker pull $TAG_SHA
99

1010
## Just check that we can bring up the container and run something
11-
docker run -it -w /src --rm $TAG_SHA poppunk_sketch --version
11+
docker run -it -w /src --rm $TAG_SHA sketchlib --version

environment.yml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ dependencies:
99
- pip
1010
- numpy
1111
- scipy
12+
- docopt
1213
- cmake >= 3.12
1314
- pybind11
1415
- zlib
@@ -21,4 +22,4 @@ dependencies:
2122
- armadillo
2223
- libgfortran-ng
2324
- nvcc_linux-64
24-
- cudatoolkit==11.2
25+
- cudatoolkit==11.2 # This is pinned due to version install on azure, see azure-pipelines.yml

pp_sketch/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,4 +3,4 @@
33

44
'''PopPUNK sketching functions'''
55

6-
__version__ = '1.7.6.2'
6+
__version__ = '2.0.0'

0 commit comments

Comments
 (0)