|
| 1 | +CXX=gcc-11 |
| 2 | +CC=gcc-11 |
| 3 | +CFLAGS+=-Wall -Wextra -fPIC |
| 4 | +CXXFLAGS+=-Wall -Wextra -std=c++17 -fopenmp -D__STDC_LIMIT_MACROS -D__STDC_CONSTANT_MACROS -fPIC |
| 5 | +ifdef DEBUG |
| 6 | + CXXFLAGS+= -O0 -g |
| 7 | + CUDAFLAGS = -g -G |
| 8 | +else ifdef PROFILE |
| 9 | + CXXFLAGS+= -O2 -g -flto -fno-fat-lto-objects -fvisibility=hidden |
| 10 | + CUDAFLAGS = -O2 -pg -lineinfo |
| 11 | +else |
| 12 | + CXXFLAGS+= -march=native -O3 -flto -fno-fat-lto-objects -fvisibility=hidden |
| 13 | + CFLAGS+= -march=native -O3 -flto -fno-fat-lto-objects -fvisibility=hidden |
| 14 | + CUDAFLAGS+= -O3 |
| 15 | +endif |
| 16 | + |
| 17 | +UNAME_S := $(shell uname -s) |
| 18 | +LIBLOC = ${CONDA_PREFIX} |
| 19 | +LDLIBS = -lz -lhdf5_cpp -lhdf5 -lopenblas -lgomp |
| 20 | +ifeq ($(UNAME_S),Linux) |
| 21 | + CXXFLAGS+= -m64 |
| 22 | + ifdef PROFILE |
| 23 | + CXXFLAGS+= -Wl,--compress-debug-sections=none |
| 24 | + endif |
| 25 | + LDLIBS+= -lpthread -lgfortran -lm -ldl -lrt |
| 26 | + LDFLAGS=-Wl,-as-needed |
| 27 | +endif |
| 28 | +ifeq ($(UNAME_S),Darwin) |
| 29 | + LDLIBS+= -pthread |
| 30 | +endif |
| 31 | + |
| 32 | +CPPFLAGS+=-I"/home/linuxbrew/.linuxbrew/include" -I"." -I"../vendor/highfive/include" -I$(LIBLOC)/include -I$(LIBLOC)/include/eigen3 |
| 33 | +LDFLAGS+= -L$(LIBLOC)/lib -L"/home/linuxbrew/.linuxbrew/lib" -L/usr/local/cuda-12.3/lib64 |
| 34 | +CUDA_LDLIBS=-lcudadevrt -lcudart_static $(LDLIBS) |
| 35 | + |
| 36 | +CUDA_LDFLAGS =-L$(LIBLOC)/lib -L${CUDA_HOME}/targets/x86_64-linux/lib/stubs -L${CUDA_HOME}/targets/x86_64-linux/lib |
| 37 | +CUDAFLAGS +=-ccbin /home/linuxbrew/.linuxbrew/bin/g++-11 -std=c++17 -Xcompiler -fPIC --cudart static --relocatable-device-code=true --expt-relaxed-constexpr -gencode arch=compute_70,code=sm_70 -gencode arch=compute_75,code=sm_75 |
| 38 | +ifdef GPU |
| 39 | + CXXFLAGS += -DGPU_AVAILABLE |
| 40 | + CUDAFLAGS += -gencode arch=compute_86,code=sm_86 |
| 41 | + CUDA_LDFLAGS += -L/usr/local/cuda-12.3/lib64 |
| 42 | +endif |
| 43 | + |
| 44 | +PYTHON_LIB = pp_sketchlib$(shell python3-config --extension-suffix) |
| 45 | + |
| 46 | +# python specific options |
| 47 | +python: CPPFLAGS += -DGPU_AVAILABLE -DPYTHON_EXT -DNDEBUG -Dpp_sketchlib_EXPORTS $(shell python3 -m pybind11 --includes) |
| 48 | + |
| 49 | +PROGRAMS=sketch_test matrix_test read_test gpu_dist_test |
| 50 | + |
| 51 | +SKETCH_OBJS=dist/dist.o dist/matrix_ops.o reference.o sketch/seqio.o sketch/sketch.o database/database.o sketch/countmin.o api.o dist/linear_regression.o random/rng.o random/random_match.o random/kmeans/KMeansRexCore.o random/kmeans/mersenneTwister2002.o |
| 52 | +GPU_SKETCH_OBJS=gpu/gpu_api.o |
| 53 | +CUDA_OBJS=gpu/dist.cu.o gpu/sketch.cu.o gpu/device_reads.cu.o gpu/gpu_countmin.cu.o gpu/device_memory.cu.o |
| 54 | + |
| 55 | +# web specific options |
| 56 | +web: CXX = em++ |
| 57 | +# optimised compile options |
| 58 | +# NB turn exceptions back on for testing |
| 59 | +# NB `--closure 1` can be used to reduce size of js file (this minifies variable names!) |
| 60 | +web: CXXFLAGS = -O3 -s ASSERTIONS=1 \ |
| 61 | + -DNOEXCEPT \ |
| 62 | + -DJSON_NOEXCEPTION \ |
| 63 | + -s DISABLE_EXCEPTION_CATCHING=1 \ |
| 64 | + -fno-exceptions \ |
| 65 | + -flto --bind -s STRICT=1 \ |
| 66 | + -s ALLOW_MEMORY_GROWTH=1 \ |
| 67 | + -s USE_ZLIB=1 \ |
| 68 | + -s MODULARIZE=1 \ |
| 69 | + -s "EXPORTED_FUNCTIONS=['_malloc']" \ |
| 70 | + -s 'EXPORTED_RUNTIME_METHODS=["FS"]' \ |
| 71 | + -s EXPORT_NAME=WebSketch \ |
| 72 | + -Wall -Wextra -std=c++14 |
| 73 | +web: CPPFLAGS += -DWEB_SKETCH |
| 74 | +web: LDFLAGS = -lnodefs.js -lworkerfs.js |
| 75 | + |
| 76 | +WEB_OUT=web/web_sketch |
| 77 | +WEB_OBJS=${WEB_OUT}.js ${WEB_OUT}.html ${WEB_OUT}.wasm |
| 78 | + |
| 79 | +web: web/web_sketch.o sketch/seqio.o sketch/sketch.o sketch/countmin.o |
| 80 | + $(LINK.cpp) $^ -o ${WEB_OUT}.js |
| 81 | + sed -i.old '1s;^;\/* eslint-disable *\/;' ${WEB_OUT}.js |
| 82 | + |
| 83 | +all: $(PROGRAMS) |
| 84 | + |
| 85 | +clean: |
| 86 | + $(RM) $(SKETCH_OBJS) $(GPU_SKETCH_OBJS) $(CUDA_OBJS) $(WEB_OBJS) *.o *.so version.h ~* $(PROGRAMS) |
| 87 | + |
| 88 | +install: all |
| 89 | + install -d $(BINDIR) |
| 90 | + install $(PROGRAMS) $(BINDIR) |
| 91 | + |
| 92 | +sketch_test: $(SKETCH_OBJS) test/main.o |
| 93 | + $(LINK.cpp) $(CUDA_LDFLAGS) $(LDFLAGS) $^ -o $@ $(LDLIBS) |
| 94 | + |
| 95 | +matrix_test: $(SKETCH_OBJS) test/matrix_test.o |
| 96 | + $(LINK.cpp) $^ -o $@ $(LDLIBS) |
| 97 | + |
| 98 | +read_test: $(SKETCH_OBJS) $(GPU_SKETCH_OBJS) $(CUDA_OBJS) test/read_test.o |
| 99 | + nvcc $(CUDAFLAGS) $(CUDA_LDFLAGS) -Wno-deprecated-gpu-targets -shared -dlink $^ -o device_link.o -Xnvlink $(CUDA_LDLIBS) |
| 100 | + $(LINK.cpp) $(CUDA_LDFLAGS) $(LDFLAGS) $^ device_link.o -o $@ $(CUDA_LDLIBS) |
| 101 | + |
| 102 | +gpu_dist_test: $(SKETCH_OBJS) $(GPU_SKETCH_OBJS) $(CUDA_OBJS) test/gpu_dist_test.o |
| 103 | + nvcc $(CUDAFLAGS) $(CUDA_LDFLAGS) -Wno-deprecated-gpu-targets -shared -dlink $^ -o device_link.o -Xnvlink $(CUDA_LDLIBS) |
| 104 | + $(LINK.cpp) $(CUDA_LDFLAGS) $(LDFLAGS) $^ device_link.o -o $@ $(CUDA_LDLIBS) |
| 105 | + |
| 106 | +version.h: |
| 107 | + cat sketch/*.cpp sketch/*.hpp gpu/sketch.cu | openssl sha1 | awk '{print "#define SKETCH_VERSION \"" $$2 "\""}' > version.h |
| 108 | + |
| 109 | +database/database.o: version.h |
| 110 | + |
| 111 | +web/web_sketch.o: version.h |
| 112 | + |
| 113 | +python: $(PYTHON_LIB) |
| 114 | + |
| 115 | +$(PYTHON_LIB): $(SKETCH_OBJS) $(GPU_SKETCH_OBJS) $(CUDA_OBJS) sketchlib_bindings.o |
| 116 | + nvcc $(CUDAFLAGS) $(CUDA_LDFLAGS) -Wno-deprecated-gpu-targets -shared -dlink $^ -o device_link.o -Xnvlink $(CUDA_LDLIBS) |
| 117 | + $(LINK.cpp) $(CUDA_LDFLAGS) $(LDFLAGS) -shared $^ device_link.o -o $(PYTHON_LIB) $(CUDA_LDLIBS) |
| 118 | + |
| 119 | +install_python: python |
| 120 | + install -d $(PYTHON_LIB_PATH) |
| 121 | + install $(PYTHON_LIB) $(PYTHON_LIB_PATH) |
| 122 | + |
| 123 | +gpu/dist.cu.o: |
| 124 | + echo ${CUDAFLAGS} |
| 125 | + echo ${CPPFLAGS} |
| 126 | + echo ${CXXFLAGS} |
| 127 | + echo ${CFLAGS} |
| 128 | + nvcc $(CUDAFLAGS) $(CPPFLAGS) -DGPU_AVAILABLE -x cu -c gpu/dist.cu -o $@ |
| 129 | + |
| 130 | +gpu/sketch.cu.o: |
| 131 | + nvcc $(CUDAFLAGS) $(CPPFLAGS) -DGPU_AVAILABLE -x cu -c gpu/sketch.cu -o $@ |
| 132 | + |
| 133 | +gpu/device_memory.cu.o: |
| 134 | + nvcc $(CUDAFLAGS) $(CPPFLAGS) -DGPU_AVAILABLE -x cu -c gpu/device_memory.cu -o $@ |
| 135 | + |
| 136 | +gpu/device_reads.cu.o: |
| 137 | + nvcc $(CUDAFLAGS) $(CPPFLAGS) -DGPU_AVAILABLE -x cu -c gpu/device_reads.cu -o $@ |
| 138 | + |
| 139 | +gpu/gpu_countmin.cu.o: |
| 140 | + nvcc $(CUDAFLAGS) $(CPPFLAGS) -DGPU_AVAILABLE -x cu -c gpu/gpu_countmin.cu -o $@ |
| 141 | + |
| 142 | +.PHONY: all clean install python install_python web |
0 commit comments