Skip to content
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,18 @@ protected BaseAccessor() { }
public boolean isNull(int index) {
return false;
}

@Override
// override this in case your implementation is faster, see BitVector
public int getNullCount() {
int nullCount = 0;
for (int i = 0; i < getValueCount(); i++) {
if (isNull(i)) {
nullCount ++;
}
}
return nullCount;
}
}

public abstract static class BaseMutator implements ValueVector.Mutator {
Expand Down
22 changes: 22 additions & 0 deletions java/vector/src/main/java/org/apache/arrow/vector/BitVector.java
Original file line number Diff line number Diff line change
Expand Up @@ -379,6 +379,28 @@ public final void get(int index, NullableBitHolder holder) {
holder.isSet = 1;
holder.value = get(index);
}

/**
* Get the number nulls, this correspond to the number of bits set to 0 in the vector
* @return the number of bits set to 0
*/
@Override
public final int getNullCount() {
int count = 0;
int sizeInBytes = getSizeFromCount(valueCount);

for (int i = 0; i < sizeInBytes; ++i) {
byte byteValue = data.getByte(i);
// Java uses two's complement binary representation, hence 11111111_b which is -1 when converted to Int
// will have 32bits set to 1. Masking the MSB and then adding it back solves the issue.
count += Integer.bitCount(byteValue & 0x7F) - (byteValue >> 7);
}
int nullCount = (sizeInBytes * 8) - count;
// if the valueCount is not a multiple of 8, the bits on the right were counted as null bits
int remainder = valueCount % 8;
nullCount -= remainder == 0 ? 0 : 8 - remainder;
return nullCount;
}
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,11 @@ interface Accessor {
* Returns true if the value at the given index is null, false otherwise.
*/
boolean isNull(int index);

/**
* Returns the number of null values
*/
int getNullCount();
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -60,15 +60,7 @@ public ArrowRecordBatch getRecordBatch() {

private void appendNodes(FieldVector vector, List<ArrowFieldNode> nodes, List<ArrowBuf> buffers) {
Accessor accessor = vector.getAccessor();
int nullCount = 0;
// TODO: should not have to do that
// we can do that a lot more efficiently (for example with Long.bitCount(i))
for (int i = 0; i < accessor.getValueCount(); i++) {
if (accessor.isNull(i)) {
nullCount ++;
}
}
nodes.add(new ArrowFieldNode(accessor.getValueCount(), nullCount));
nodes.add(new ArrowFieldNode(accessor.getValueCount(), accessor.getNullCount()));
List<ArrowBuf> fieldBuffers = vector.getFieldBuffers();
List<ArrowVectorType> expectedBuffers = vector.getField().getTypeLayout().getVectorTypes();
if (fieldBuffers.size() != expectedBuffers.size()) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,11 @@ public int getValueCount() {
public boolean isNull(int index) {
return true;
}

@Override
public int getNullCount() {
return 0;
}
};

private final Mutator defaultMutator = new Mutator() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -310,6 +310,11 @@ public Object getObject(int index) {
public boolean isNull(int index) {
return bits.getAccessor().get(index) == 0;
}

@Override
public int getNullCount() {
return bits.getAccessor().getNullCount();
}
}

public class Mutator extends BaseRepeatedMutator {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,11 @@ public void get(int index, ComplexHolder holder) {
super.get(index, holder);
}

@Override
public int getNullCount() {
return bits.getAccessor().getNullCount();
}

@Override
public boolean isNull(int index) {
return isSet(index) == 0;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -288,19 +288,24 @@ public void testBitVector() {
try (final BitVector vector = new BitVector(EMPTY_SCHEMA_PATH, allocator)) {
final BitVector.Mutator m = vector.getMutator();
vector.allocateNew(1024);
m.setValueCount(1024);

// Put and set a few values
m.set(0, 1);
m.set(1, 0);
m.set(100, 0);
m.set(1022, 1);

m.setValueCount(1024);

final BitVector.Accessor accessor = vector.getAccessor();
assertEquals(1, accessor.get(0));
assertEquals(0, accessor.get(1));
assertEquals(0, accessor.get(100));
assertEquals(1, accessor.get(1022));

assertEquals(1022, accessor.getNullCount());

// test setting the same value twice
m.set(0, 1);
m.set(0, 1);
Expand All @@ -315,8 +320,47 @@ public void testBitVector() {
assertEquals(0, accessor.get(0));
assertEquals(1, accessor.get(1));

// should not change
assertEquals(1022, accessor.getNullCount());

// Ensure unallocated space returns 0
assertEquals(0, accessor.get(3));

// unset the previously set bits
m.set(1, 0);
m.set(1022, 0);
// this should set all the array to 0
assertEquals(1024, accessor.getNullCount());

// set all the array to 1
for (int i = 0; i < 1024; ++i) {
assertEquals(1024 - i, accessor.getNullCount());
m.set(i, 1);
}

assertEquals(0, accessor.getNullCount());

vector.allocateNew(1015);
m.setValueCount(1015);

// ensure it has been zeroed
assertEquals(1015, accessor.getNullCount());

m.set(0, 1);
m.set(1014, 1); // ensure that the last item of the last byte is allocated

assertEquals(1013, accessor.getNullCount());

vector.zeroVector();
assertEquals(1015, accessor.getNullCount());

// set all the array to 1
for (int i = 0; i < 1015; ++i) {
assertEquals(1015 - i, accessor.getNullCount());
m.set(i, 1);
}

assertEquals(0, accessor.getNullCount());
}
}

Expand Down