Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion dev/tasks/python-wheels/manylinux-test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ pip install -q /arrow/python/$WHEEL_TAG/dist/*.whl
# Install test dependencies (pip won't work after removing system zlib)
pip install -q -r /arrow/python/requirements-test.txt
# Run pyarrow tests
pytest -v --pyargs pyarrow
pytest -rs --pyargs pyarrow

if [[ "$1" == "--remove-system-libs" ]]; then
# Run import tests after removing the bundled dependencies from the system
Expand Down
11 changes: 8 additions & 3 deletions dev/tasks/python-wheels/osx-build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -183,14 +183,19 @@ function install_wheel {
pip install $(pip_opts) \
$(python $multibuild_dir/supported_wheels.py $wheelhouse/*.whl)

# Install test dependencies
pip install $(pip_opts) -r python/requirements-test.txt
popd
}

function run_unit_tests {
pushd $1

# Install test dependencies
pip install $(pip_opts) -r python/requirements-test.txt

# Run pyarrow tests
py.test --pyargs pyarrow
pytest -rs --pyargs pyarrow
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why is this without a . in the name?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

One is just an alias for the other.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we generally use the "pytest" spelling. At least that's the one I use locally (it's also consistent with "python -m pytest").

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

pytest is the preferred command since a couple of releases now, the documentation also refers that https://docs.pytest.org/en/latest/usage.html

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

py.test was once the preferred way but browsing the documentation, it seems this is no longer the case.


popd
}

function run_import_tests {
Expand Down
2 changes: 1 addition & 1 deletion dev/tasks/python-wheels/travis.osx.yml
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ install:
# install the built wheel and test dependencies
- install_wheel arrow
# run unit tests before removing the system libraries
- run_unit_tests
- run_unit_tests arrow
# remove libz to ensure that it is properly bundled
- sudo find /usr -name libz.* -delete
# run the import tests
Expand Down
4 changes: 2 additions & 2 deletions dev/tasks/python-wheels/win-build.bat
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ set ARROW_TEST_DATA=%ARROW_SRC%\testing\data
@rem test the wheel
@rem TODO For maximum reliability, we should test in a plain virtualenv instead.
call conda create -n wheel-test -q -y python=%PYTHON_VERSION% ^
numpy=%NUMPY_VERSION% pandas pytest hypothesis || exit /B
numpy=%NUMPY_VERSION% pandas cython pytest hypothesis || exit /B
call activate wheel-test

@rem install the built wheel
Expand All @@ -90,4 +90,4 @@ pip install -vv --no-index --find-links=%ARROW_SRC%\python\dist\ pyarrow || exit
python -c "import pyarrow; import pyarrow.parquet; import pyarrow.flight; import pyarrow.gandiva;" || exit /B

@rem run the python tests
pytest --pyargs pyarrow || exit /B
pytest -rs --pyargs pyarrow || exit /B
16 changes: 8 additions & 8 deletions dev/tasks/tasks.yml
Original file line number Diff line number Diff line change
Expand Up @@ -180,7 +180,7 @@ tasks:
unicode_width: 32
wheel_tag: manylinux1
test_docker_images:
- python:2.7-slim # debian ucs4
- python:2.7 # debian ucs4
test_remove_system_libs: false
artifacts:
- pyarrow-{no_rc_version}-cp27-cp27mu-manylinux1_x86_64.whl
Expand All @@ -194,7 +194,7 @@ tasks:
unicode_width: 16
wheel_tag: manylinux1
test_docker_images:
- python:3.5-slim
- python:3.5
test_remove_system_libs: true
artifacts:
- pyarrow-{no_rc_version}-cp35-cp35m-manylinux1_x86_64.whl
Expand All @@ -208,7 +208,7 @@ tasks:
unicode_width: 16
wheel_tag: manylinux1
test_docker_images:
- python:3.6-slim
- python:3.6
test_remove_system_libs: true
artifacts:
- pyarrow-{no_rc_version}-cp36-cp36m-manylinux1_x86_64.whl
Expand All @@ -222,7 +222,7 @@ tasks:
unicode_width: 16
wheel_tag: manylinux1
test_docker_images:
- python:3.7-slim
- python:3.7
test_remove_system_libs: true
artifacts:
- pyarrow-{no_rc_version}-cp37-cp37m-manylinux1_x86_64.whl
Expand All @@ -249,7 +249,7 @@ tasks:
unicode_width: 32
wheel_tag: manylinux2010
test_docker_images:
- python:2.7-slim # debian ucs4
- python:2.7 # debian ucs4
test_remove_system_libs: false
artifacts:
- pyarrow-{no_rc_version}-cp27-cp27mu-manylinux2010_x86_64.whl
Expand All @@ -263,7 +263,7 @@ tasks:
unicode_width: 16
wheel_tag: manylinux2010
test_docker_images:
- python:3.5-slim
- python:3.5
test_remove_system_libs: true
artifacts:
- pyarrow-{no_rc_version}-cp35-cp35m-manylinux2010_x86_64.whl
Expand All @@ -277,7 +277,7 @@ tasks:
unicode_width: 16
wheel_tag: manylinux2010
test_docker_images:
- python:3.6-slim
- python:3.6
test_remove_system_libs: true
artifacts:
- pyarrow-{no_rc_version}-cp36-cp36m-manylinux2010_x86_64.whl
Expand All @@ -291,7 +291,7 @@ tasks:
unicode_width: 16
wheel_tag: manylinux2010
test_docker_images:
- python:3.7-slim
- python:3.7
test_remove_system_libs: true
artifacts:
- pyarrow-{no_rc_version}-cp37-cp37m-manylinux2010_x86_64.whl
Expand Down
76 changes: 56 additions & 20 deletions python/pyarrow/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -209,6 +209,43 @@ def get_include():
return _os.path.join(_os.path.dirname(__file__), 'include')


def _get_pkg_config_executable():
return _os.environ.get('PKG_CONFIG', 'pkg-config')


def _has_pkg_config(pkgname):
import subprocess
try:
return subprocess.call([_get_pkg_config_executable(),
'--exists', pkgname]) == 0
except OSError:
# TODO: replace with FileNotFoundError once we ditch 2.7
return False


def _read_pkg_config_variable(pkgname, cli_args):
import subprocess
cmd = [_get_pkg_config_executable(), pkgname] + cli_args
proc = subprocess.Popen(cmd, stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
out, err = proc.communicate()
if proc.returncode != 0:
raise RuntimeError("pkg-config failed: " + err.decode('utf8'))
return out.rstrip().decode('utf8')


def get_so_version():
"""
Return the SO version for Arrow libraries.
"""
if _sys.platform == 'win32':
raise NotImplementedError("Cannot get SO version on Windows")
if _has_pkg_config("arrow"):
return _read_pkg_config_variable("arrow", ["--variable=so_version"])
else:
return "100" # XXX Find a way not to hardcode this?


def get_libraries():
"""
Return list of library names to include in the `libraries` argument for C
Expand All @@ -223,38 +260,37 @@ def get_library_dirs():
linking C or Cython extensions using pyarrow
"""
package_cwd = _os.path.dirname(__file__)

library_dirs = [package_cwd]

def append_library_dir(library_dir):
if library_dir not in library_dirs:
library_dirs.append(library_dir)

# Search library paths via pkg-config. This is necessary if the user
# installed libarrow and the other shared libraries manually and they
# are not shipped inside the pyarrow package (see also ARROW-2976).
from subprocess import call, PIPE, Popen
pkg_config_executable = _os.environ.get('PKG_CONFIG', None) or 'pkg-config'
for package in ["arrow", "plasma", "arrow_python"]:
cmd = '{0} --exists {1}'.format(pkg_config_executable, package).split()
try:
if call(cmd) == 0:
cmd = [pkg_config_executable, "--libs-only-L", package]
proc = Popen(cmd, stdout=PIPE, stderr=PIPE)
out, err = proc.communicate()
library_dir = out.rstrip().decode('utf-8')[2:] # strip "-L"
if library_dir not in library_dirs:
library_dirs.append(library_dir)
except FileNotFoundError:
pass
pkg_config_executable = _os.environ.get('PKG_CONFIG') or 'pkg-config'
for pkgname in ["arrow", "arrow_python"]:
if _has_pkg_config(pkgname):
library_dir = _read_pkg_config_variable(pkgname,
["--libs-only-L"])
assert library_dir.startswith("-L")
append_library_dir(library_dir[2:])

if _sys.platform == 'win32':
# TODO(wesm): Is this necessary, or does setuptools within a conda
# installation add Library\lib to the linker path for MSVC?
python_base_install = _os.path.dirname(_sys.executable)
library_lib = _os.path.join(python_base_install, 'Library', 'lib')
library_dir = _os.path.join(python_base_install, 'Library', 'lib')

if _os.path.exists(_os.path.join(library_lib, 'arrow.lib')):
library_dirs.append(library_lib)
if _os.path.exists(_os.path.join(library_dir, 'arrow.lib')):
append_library_dir(library_dir)

# ARROW-4074: Allow for ARROW_HOME to be set to some other directory
if 'ARROW_HOME' in _os.environ:
library_dirs.append(_os.path.join(_os.environ['ARROW_HOME'], 'lib'))
if _os.environ.get('ARROW_HOME'):
append_library_dir(_os.path.join(_os.environ['ARROW_HOME'], 'lib'))
else:
# Python wheels bundle the Arrow libraries in the pyarrow directory.
append_library_dir(_os.path.dirname(_os.path.abspath(__file__)))

return library_dirs
3 changes: 3 additions & 0 deletions python/pyarrow/includes/libarrow.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,9 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil:
cdef cppclass CFixedWidthType" arrow::FixedWidthType"(CDataType):
int bit_width()

cdef cppclass CNullArray" arrow::NullArray"(CArray):
CNullArray(int64_t length)

cdef cppclass CDictionaryArray" arrow::DictionaryArray"(CArray):
CDictionaryArray(const shared_ptr[CDataType]& type,
const shared_ptr[CArray]& indices,
Expand Down
4 changes: 2 additions & 2 deletions python/pyarrow/ipc.pxi
Original file line number Diff line number Diff line change
Expand Up @@ -536,7 +536,7 @@ def read_schema(obj, DictionaryMemo dictionary_memo=None):
get_reader(obj, True, &cpp_file)

if dictionary_memo is not None:
arg_dict_memo = &dictionary_memo.memo
arg_dict_memo = dictionary_memo.memo
else:
arg_dict_memo = &temp_memo

Expand Down Expand Up @@ -575,7 +575,7 @@ def read_record_batch(obj, Schema schema,
message = read_message(obj)

if dictionary_memo is not None:
arg_dict_memo = &dictionary_memo.memo
arg_dict_memo = dictionary_memo.memo
else:
arg_dict_memo = &temp_memo

Expand Down
5 changes: 4 additions & 1 deletion python/pyarrow/lib.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,10 @@ cdef class StructType(DataType):

cdef class DictionaryMemo:
cdef:
CDictionaryMemo memo
# Even though the CDictionaryMemo instance is private, we allocate
# it on the heap so as to avoid C++ ABI issues with Python wheels.
shared_ptr[CDictionaryMemo] sp_memo
CDictionaryMemo* memo


cdef class DictionaryType(DataType):
Expand Down
12 changes: 8 additions & 4 deletions python/pyarrow/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@


groups = [
'cython',
'hypothesis',
'gandiva',
'hdfs',
Expand All @@ -53,6 +54,7 @@


defaults = {
'cython': False,
'hypothesis': False,
'gandiva': False,
'hdfs': False,
Expand All @@ -66,6 +68,12 @@
'flight': False,
}

try:
import cython # noqa
defaults['cython'] = True
except ImportError:
pass

try:
import pyarrow.gandiva # noqa
defaults['gandiva'] = True
Expand All @@ -78,14 +86,12 @@
except ImportError:
pass


try:
import pandas # noqa
defaults['pandas'] = True
except ImportError:
pass


try:
import pyarrow.parquet # noqa
defaults['parquet'] = True
Expand All @@ -98,14 +104,12 @@
except ImportError:
pass


try:
import tensorflow # noqa
defaults['tensorflow'] = True
except ImportError:
pass


try:
import pyarrow.flight # noqa
defaults['flight'] = True
Expand Down
10 changes: 9 additions & 1 deletion python/pyarrow/tests/pyarrow_cython_example.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,17 @@ from pyarrow.lib cimport *


def get_array_length(obj):
# Just an example function accessing both the pyarrow Cython API
# An example function accessing both the pyarrow Cython API
# and the Arrow C++ API
cdef shared_ptr[CArray] arr = pyarrow_unwrap_array(obj)
if arr.get() == NULL:
raise TypeError("not an array")
return arr.get().length()


def make_null_array(length):
# An example function that returns a PyArrow object without PyArrow
# being imported explicitly at the Python level.
cdef shared_ptr[CArray] null_array
null_array.reset(new CNullArray(length))
return pyarrow_wrap_array(null_array)
Loading