Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions java/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -303,6 +303,9 @@
<systemPropertyVariables>
<java.io.tmpdir>${project.build.directory}</java.io.tmpdir>
</systemPropertyVariables>
<!-- Note: changing the below configuration might increase the max allocation size for a vector
which in turn can cause OOM. -->
<argLine>-Darrow.vector.max_allocation_bytes=1048576</argLine>
</configuration>
</plugin>
<plugin>
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.arrow.vector;

import org.apache.arrow.memory.BufferAllocator;
import org.apache.arrow.memory.RootAllocator;
import org.apache.arrow.vector.holders.UInt4Holder;
import org.apache.arrow.vector.types.MaterializedField;
import org.apache.arrow.vector.util.OversizedAllocationException;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;

import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;

/**
* This class tests that OversizedAllocationException occurs when a large memory is allocated for a vector.
* Typically, arrow allows the allocation of the size of at most Integer.MAX_VALUE, but this might cause OOM in tests.
* Thus, the max allocation size is limited to 1 KB in this class. Please see the surefire option in pom.xml.
*/
public class TestOversizedAllocationForValueVector {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you add a short comment here about how the pom changes allow this to work in less memory than typically required?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I added a comment.
Besides, I think it would be nice if we have a wiki for arrow developers. In wiki, we can easily search this information about limiting the max allocation size in tests.


private final static String EMPTY_SCHEMA_PATH = "";

private BufferAllocator allocator;

@Before
public void init() {
allocator = new RootAllocator(Long.MAX_VALUE);
}

@After
public void terminate() throws Exception {
allocator.close();
}

@Test(expected = OversizedAllocationException.class)
public void testFixedVectorReallocation() {
final MaterializedField field = MaterializedField.create(EMPTY_SCHEMA_PATH, UInt4Holder.TYPE);
final UInt4Vector vector = new UInt4Vector(field, allocator);
// edge case 1: buffer size = max value capacity
final int expectedValueCapacity = BaseValueVector.MAX_ALLOCATION_SIZE / 4;
try {
vector.allocateNew(expectedValueCapacity);
assertEquals(expectedValueCapacity, vector.getValueCapacity());
vector.reAlloc();
assertEquals(expectedValueCapacity * 2, vector.getValueCapacity());
} finally {
vector.close();
}

// common case: value count < max value capacity
try {
vector.allocateNew(BaseValueVector.MAX_ALLOCATION_SIZE / 8);
vector.reAlloc(); // value allocation reaches to MAX_VALUE_ALLOCATION
vector.reAlloc(); // this should throw an IOOB
} finally {
vector.close();
}
}

@Test(expected = OversizedAllocationException.class)
public void testBitVectorReallocation() {
final MaterializedField field = MaterializedField.create(EMPTY_SCHEMA_PATH, UInt4Holder.TYPE);
final BitVector vector = new BitVector(field, allocator);
// edge case 1: buffer size ~ max value capacity
final int expectedValueCapacity = 1 << 29;
try {
vector.allocateNew(expectedValueCapacity);
assertEquals(expectedValueCapacity, vector.getValueCapacity());
vector.reAlloc();
assertEquals(expectedValueCapacity * 2, vector.getValueCapacity());
} finally {
vector.close();
}

// common: value count < MAX_VALUE_ALLOCATION
try {
vector.allocateNew(expectedValueCapacity);
for (int i=0; i<3;i++) {
vector.reAlloc(); // expand buffer size
}
assertEquals(Integer.MAX_VALUE, vector.getValueCapacity());
vector.reAlloc(); // buffer size ~ max allocation
assertEquals(Integer.MAX_VALUE, vector.getValueCapacity());
vector.reAlloc(); // overflow
} finally {
vector.close();
}
}


@Test(expected = OversizedAllocationException.class)
public void testVariableVectorReallocation() {
final MaterializedField field = MaterializedField.create(EMPTY_SCHEMA_PATH, UInt4Holder.TYPE);
final VarCharVector vector = new VarCharVector(field, allocator);
// edge case 1: value count = MAX_VALUE_ALLOCATION
final int expectedAllocationInBytes = BaseValueVector.MAX_ALLOCATION_SIZE;
final int expectedOffsetSize = 10;
try {
vector.allocateNew(expectedAllocationInBytes, 10);
assertTrue(expectedOffsetSize <= vector.getValueCapacity());
assertTrue(expectedAllocationInBytes <= vector.getBuffer().capacity());
vector.reAlloc();
assertTrue(expectedOffsetSize * 2 <= vector.getValueCapacity());
assertTrue(expectedAllocationInBytes * 2 <= vector.getBuffer().capacity());
} finally {
vector.close();
}

// common: value count < MAX_VALUE_ALLOCATION
try {
vector.allocateNew(BaseValueVector.MAX_ALLOCATION_SIZE / 2, 0);
vector.reAlloc(); // value allocation reaches to MAX_VALUE_ALLOCATION
vector.reAlloc(); // this tests if it overflows
} finally {
vector.close();
}
}
}
131 changes: 5 additions & 126 deletions java/vector/src/test/java/org/apache/arrow/vector/TestValueVector.java
Original file line number Diff line number Diff line change
Expand Up @@ -17,70 +17,33 @@
*/
package org.apache.arrow.vector;

import static org.junit.Assert.assertArrayEquals;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;

import java.nio.charset.Charset;

import org.apache.arrow.memory.BufferAllocator;
import org.apache.arrow.memory.RootAllocator;
import org.apache.arrow.vector.complex.ListVector;
import org.apache.arrow.vector.complex.MapVector;
import org.apache.arrow.vector.complex.RepeatedListVector;
import org.apache.arrow.vector.complex.RepeatedMapVector;
import org.apache.arrow.vector.holders.BitHolder;
import org.apache.arrow.vector.holders.IntHolder;
import org.apache.arrow.vector.holders.NullableFloat4Holder;
import org.apache.arrow.vector.holders.NullableUInt4Holder;
import org.apache.arrow.vector.holders.NullableVar16CharHolder;
import org.apache.arrow.vector.holders.NullableVarCharHolder;
import org.apache.arrow.vector.holders.RepeatedFloat4Holder;
import org.apache.arrow.vector.holders.RepeatedIntHolder;
import org.apache.arrow.vector.holders.RepeatedVarBinaryHolder;
import org.apache.arrow.vector.holders.UInt4Holder;
import org.apache.arrow.vector.holders.VarCharHolder;
import org.apache.arrow.vector.holders.*;
import org.apache.arrow.vector.types.MaterializedField;
import org.apache.arrow.vector.types.Types;
import org.apache.arrow.vector.types.Types.MinorType;
import org.apache.arrow.vector.util.BasicTypeHelper;
import org.apache.arrow.vector.util.OversizedAllocationException;
import org.junit.After;
import org.junit.Before;
import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.ExternalResource;

import java.nio.charset.Charset;

import static org.junit.Assert.*;


public class TestValueVector {
//private static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(TestValueVector.class);

private final static String EMPTY_SCHEMA_PATH = "";

private BufferAllocator allocator;

// Rule to adjust MAX_ALLOCATION_SIZE and restore it back after the tests
@Rule
public final ExternalResource rule = new ExternalResource() {
private final String systemValue = System.getProperty(BaseValueVector.MAX_ALLOCATION_SIZE_PROPERTY);
private final String testValue = Long.toString(32*1024*1024);

@Override
protected void before() throws Throwable {
System.setProperty(BaseValueVector.MAX_ALLOCATION_SIZE_PROPERTY, testValue);
}

@Override
protected void after() {
if (systemValue != null) {
System.setProperty(BaseValueVector.MAX_ALLOCATION_SIZE_PROPERTY, systemValue);
}
else {
System.clearProperty(BaseValueVector.MAX_ALLOCATION_SIZE_PROPERTY);
}
}
};

@Before
public void init() {
allocator = new RootAllocator(Long.MAX_VALUE);
Expand All @@ -96,90 +59,6 @@ public void terminate() throws Exception {
allocator.close();
}

@Test(expected = OversizedAllocationException.class)
public void testFixedVectorReallocation() {
final MaterializedField field = MaterializedField.create(EMPTY_SCHEMA_PATH, UInt4Holder.TYPE);
final UInt4Vector vector = new UInt4Vector(field, allocator);
// edge case 1: buffer size = max value capacity
final int expectedValueCapacity = BaseValueVector.MAX_ALLOCATION_SIZE / 4;
try {
vector.allocateNew(expectedValueCapacity);
assertEquals(expectedValueCapacity, vector.getValueCapacity());
vector.reAlloc();
assertEquals(expectedValueCapacity * 2, vector.getValueCapacity());
} finally {
vector.close();
}

// common case: value count < max value capacity
try {
vector.allocateNew(BaseValueVector.MAX_ALLOCATION_SIZE / 8);
vector.reAlloc(); // value allocation reaches to MAX_VALUE_ALLOCATION
vector.reAlloc(); // this should throw an IOOB
} finally {
vector.close();
}
}

@Test(expected = OversizedAllocationException.class)
public void testBitVectorReallocation() {
final MaterializedField field = MaterializedField.create(EMPTY_SCHEMA_PATH, UInt4Holder.TYPE);
final BitVector vector = new BitVector(field, allocator);
// edge case 1: buffer size ~ max value capacity
final int expectedValueCapacity = 1 << 29;
try {
vector.allocateNew(expectedValueCapacity);
assertEquals(expectedValueCapacity, vector.getValueCapacity());
vector.reAlloc();
assertEquals(expectedValueCapacity * 2, vector.getValueCapacity());
} finally {
vector.close();
}

// common: value count < MAX_VALUE_ALLOCATION
try {
vector.allocateNew(expectedValueCapacity);
for (int i=0; i<3;i++) {
vector.reAlloc(); // expand buffer size
}
assertEquals(Integer.MAX_VALUE, vector.getValueCapacity());
vector.reAlloc(); // buffer size ~ max allocation
assertEquals(Integer.MAX_VALUE, vector.getValueCapacity());
vector.reAlloc(); // overflow
} finally {
vector.close();
}
}


@Test(expected = OversizedAllocationException.class)
public void testVariableVectorReallocation() {
final MaterializedField field = MaterializedField.create(EMPTY_SCHEMA_PATH, UInt4Holder.TYPE);
final VarCharVector vector = new VarCharVector(field, allocator);
// edge case 1: value count = MAX_VALUE_ALLOCATION
final int expectedAllocationInBytes = BaseValueVector.MAX_ALLOCATION_SIZE;
final int expectedOffsetSize = 10;
try {
vector.allocateNew(expectedAllocationInBytes, 10);
assertTrue(expectedOffsetSize <= vector.getValueCapacity());
assertTrue(expectedAllocationInBytes <= vector.getBuffer().capacity());
vector.reAlloc();
assertTrue(expectedOffsetSize * 2 <= vector.getValueCapacity());
assertTrue(expectedAllocationInBytes * 2 <= vector.getBuffer().capacity());
} finally {
vector.close();
}

// common: value count < MAX_VALUE_ALLOCATION
try {
vector.allocateNew(BaseValueVector.MAX_ALLOCATION_SIZE / 2, 0);
vector.reAlloc(); // value allocation reaches to MAX_VALUE_ALLOCATION
vector.reAlloc(); // this tests if it overflows
} finally {
vector.close();
}
}

@Test
public void testFixedType() {
final MaterializedField field = MaterializedField.create(EMPTY_SCHEMA_PATH, UInt4Holder.TYPE);
Expand Down