diff --git a/.github/workflows/pythonpackage.yml b/.github/workflows/pythonpackage.yml index 45601d0199..3c33bfa942 100644 --- a/.github/workflows/pythonpackage.yml +++ b/.github/workflows/pythonpackage.yml @@ -17,8 +17,8 @@ defaults: shell: bash -l {0} concurrency: - group: ${{ github.ref }} - cancel-in-progress: true + group: ${{ github.workflow }}-${{ github.ref || github.run_id }} + cancel-in-progress: ${{ github.event_name == 'pull_request' }} jobs: main: @@ -30,8 +30,8 @@ jobs: fail-fast: false max-parallel: 12 matrix: - os: [ubuntu-latest, windows-latest, macOS-11] - python-version: [3.6, 3.7, 3.8, 3.9] + os: [ubuntu-latest, windows-latest, macOS-12] + python-version: [3.8, 3.9, "3.10", "3.11", "3.12"] steps: - name: disk space @@ -40,9 +40,9 @@ jobs: df -h - uses: maxim-lobanov/setup-xcode@v1 # alternative would be to upgrade tapi to 1100.0.11, we can possibly remove this in the future - if: matrix.os == 'macOS-11' + if: matrix.os == 'macOS-12' with: - xcode-version: "11.7.0" + xcode-version: "13.2.1" - uses: actions/checkout@v2 with: submodules: true @@ -53,11 +53,12 @@ jobs: token: ${{ secrets.PAT_PULL_ENTERPRISE }} path: vaex-enterprise - name: install micromamba - uses: mamba-org/provision-with-micromamba@main + uses: mamba-org/setup-micromamba@v1 with: + micromamba-version: 1.5.8-0 environment-name: vaex-dev environment-file: ci/conda-env.yml - extra-specs: | + create-args: >- python=${{ matrix.python-version }} - name: Extra non-windows installs if: matrix.os != 'windows-latest' @@ -79,12 +80,12 @@ jobs: # docker rmi $(docker image ls -aq) # df -h - name: Install OpenMP runtime (Mac-only) - if: matrix.os == 'macOS-11' + if: matrix.os == 'macOS-12' run: | brew install libomp - name: Cache compiled binaries # this fails for this combination, leading to binaries filled with 0's - if: matrix.python-version != '3.6' || matrix.os != 'macOS-11' + if: matrix.os != 'macOS-12' id: cache-compiled-binaries uses: actions/cache@v2 with: @@ -137,11 +138,6 @@ jobs: # # flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics # # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide # # flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics - - name: Install fixes for Python 3.6 - if: matrix.python-version == '3.6' - run: | - # by default on 3.6 we get an old version, so manually upgrade - pip install gcsfs==0.8.0 - name: Install pytest-asyncio correct version if: matrix.python-version != '3.10' run: | @@ -155,8 +151,8 @@ jobs: run: | VAEX_CACHE_RESULTS=1 ./ci/04-run-test-suite.sh - name: Check ml spec - # no catboost for py39 and py37 - if: matrix.python-version != '3.9' && matrix.os != 'windows-latest' && matrix.python-version != '3.7' + # no catboost for py39 and py37, xgboost fails on 3.6 + if: matrix.python-version != '3.9' && matrix.os != 'windows-latest' run: | python -m vaex.ml.spec packages/vaex-ml/vaex/ml/spec_new.json diff packages/vaex-ml/vaex/ml/spec_new.json packages/vaex-ml/vaex/ml/spec.json @@ -165,19 +161,19 @@ jobs: # run: | # py.test vaex-enterprise/tests --timeout=1000 - name: Test notebooks - if: matrix.os != 'windows-latest' && matrix.python-version != '3.6' + if: matrix.os != 'windows-latest' run: | ./ci/05-run-notebooks.sh - name: Authenticate Google Cloud Platform - if: ${{ (github.event.pull_request.head.repo.full_name == 'vaexio/vaex') && !((matrix.os == 'windows-latest') || (matrix.os == 'macOS-11' && matrix.python-version == '3.6')) }} + if: ${{ (github.event.pull_request.head.repo.full_name == 'vaexio/vaex') && !((matrix.os == 'windows-latest')) }} uses: google-github-actions/setup-gcloud@v0 with: project_id: ${{ secrets.GCP_PROJECT_ID_VAEX }} service_account_key: ${{ secrets.GCP_SA_KEY_VAEX }} export_default_credentials: true - name: Test vaex-contrib - # do not run in a PR from someone else, skip windows, and osx+py36 - if: ${{ (github.event.pull_request.head.repo.full_name == 'vaexio/vaex') && !((matrix.os == 'windows-latest') || (matrix.os == 'macOS-11' && matrix.python-version == '3.6')) }} + # do not run in a PR from someone else, skip windows + if: ${{ (github.event.pull_request.head.repo.full_name == 'vaexio/vaex') && !((matrix.os == 'windows-latest')) }} env: PROJECT_ID: ${{ secrets.GCP_PROJECT_ID_VAEX }} run: | @@ -191,12 +187,8 @@ jobs: strategy: fail-fast: false matrix: - os: [ubuntu-latest, windows-latest, macOS-11] - python-version: [3.7, 3.8, 3.9] - # ssl/certifi issues with this combination - exclude: - - os: windows-latest - python-version: 3.7 + os: [ubuntu-latest, windows-latest, macOS-12] + python-version: [3.8, 3.9] steps: - uses: actions/checkout@v2 @@ -206,11 +198,12 @@ jobs: path: ./dist - name: Install micromamba - uses: mamba-org/provision-with-micromamba@main + uses: mamba-org/setup-micromamba@v1 with: + micromamba-version: 1.5.6-0 environment-name: vaex-test environment-file: ci/conda-base-minimal.yml - extra-specs: | + create-args: >- python=${{ matrix.python-version }} pcre rich diff --git a/.github/workflows/wheel-universal.yml b/.github/workflows/wheel-universal.yml index fe2fd092a7..3be4f23408 100644 --- a/.github/workflows/wheel-universal.yml +++ b/.github/workflows/wheel-universal.yml @@ -2,16 +2,17 @@ name: Build universal wheel on: push: - tags: - - meta-* - - astro-* - - graphql-* - - jupyter-* - - ml-* - - viz-* - - hdf5-* - - server-* - - contrib-* + branches: + - master + pull_request: + branches: + - master + schedule: + - cron: "0 2 * * 1-5" + +concurrency: + group: ${{ github.workflow }}-${{ github.ref || github.run_id }} + cancel-in-progress: ${{ github.event_name == 'pull_request' }} jobs: build_wheels: @@ -41,7 +42,6 @@ jobs: mkdir dist - name: Build vaex (meta) - if: startsWith(github.ref, 'refs/tags/meta') run: | (cp README.md packages/vaex-meta/ && cd packages/vaex-meta && python setup.py sdist bdist_wheel); cp packages/vaex-meta/dist/* dist @@ -54,7 +54,6 @@ jobs: twine upload dist/vaex* dist/vaex* - name: Build vaex-graphql - if: startsWith(github.ref, 'refs/tags/graphql') run: | (cd packages/vaex-graphql && python setup.py sdist bdist_wheel); cp packages/vaex-graphql/dist/* dist @@ -68,7 +67,6 @@ jobs: - name: Build vaex-jupyter - if: startsWith(github.ref, 'refs/tags/jupyter') run: | (cd packages/vaex-jupyter && python setup.py sdist bdist_wheel); cp packages/vaex-jupyter/dist/* dist @@ -81,7 +79,6 @@ jobs: twine upload dist/vaex-jupyter* dist/vaex_jupyter* - name: Build vaex-ml - if: startsWith(github.ref, 'refs/tags/ml') run: | (cd packages/vaex-ml && python setup.py sdist bdist_wheel); cp packages/vaex-ml/dist/* dist @@ -94,7 +91,6 @@ jobs: twine upload dist/vaex-ml* dist/vaex_ml* - name: Build vaex-contrib - if: startsWith(github.ref, 'refs/tags/contrib') run: | (cd packages/vaex-contrib && python setup.py sdist bdist_wheel); cp packages/vaex-contrib/dist/* dist @@ -109,7 +105,6 @@ jobs: - name: Build vaex-viz - if: startsWith(github.ref, 'refs/tags/viz') run: | (cd packages/vaex-viz && python setup.py sdist bdist_wheel); cp packages/vaex-viz/dist/* dist @@ -122,7 +117,6 @@ jobs: twine upload dist/vaex-viz* dist/vaex_viz* - name: Build vaex-astro - if: startsWith(github.ref, 'refs/tags/astro') run: | (cd packages/vaex-astro && python setup.py sdist bdist_wheel); cp packages/vaex-astro/dist/* dist @@ -135,7 +129,6 @@ jobs: twine upload dist/vaex-astro* dist/vaex_astro* - name: Build vaex-hdf5 - if: startsWith(github.ref, 'refs/tags/hdf5') run: | (cd packages/vaex-hdf5 && python setup.py sdist bdist_wheel); cp packages/vaex-hdf5/dist/* dist @@ -148,7 +141,6 @@ jobs: twine upload dist/vaex-hdf5* dist/vaex_hdf5* - name: Build vaex-server - if: startsWith(github.ref, 'refs/tags/server') run: | (cd packages/vaex-server && python setup.py sdist bdist_wheel); cp packages/vaex-server/dist/* dist diff --git a/.github/workflows/wheel.yml b/.github/workflows/wheel.yml index 8d27f3e9f2..027c9f90b1 100644 --- a/.github/workflows/wheel.yml +++ b/.github/workflows/wheel.yml @@ -2,88 +2,133 @@ name: Build binary wheel on: push: - tags: - - core-* + branches: + - master + pull_request: + branches: + - master + schedule: + - cron: "0 2 * * 1-5" + release: + types: [released, prereleased] + workflow_dispatch: # allows running workflow manually from the Actions tab + +concurrency: + group: ${{ github.workflow }}-${{ github.ref || github.run_id }} + cancel-in-progress: ${{ github.event_name == 'pull_request' }} jobs: + build_wheels_matrix: + runs-on: ubuntu-latest + outputs: + include: ${{ steps.set-matrix.outputs.include }} + + steps: + - uses: actions/checkout@v4 + with: + submodules: true + + - uses: actions/setup-python@v5 + with: + python-version: "3.x" + + - run: pip install cibuildwheel==2.17.0 # sync version with pypa/cibuildwheel below + + - id: set-matrix + env: + CIBW_SKIP: pp* + run: | + MATRIX_INCLUDE=$( + { + cibuildwheel --print-build-identifiers --platform linux --arch x86_64,aarch64 | jq -nRc '{"only": inputs, "os": "ubuntu-latest"}' \ + && cibuildwheel --print-build-identifiers --platform macos --arch x86_64 | jq -nRc '{"only": inputs, "os": "macos-13"}' \ + && cibuildwheel --print-build-identifiers --platform macos --arch arm64 | jq -nRc '{"only": inputs, "os": "macos-14"}' \ + && cibuildwheel --print-build-identifiers --platform windows --arch AMD64 | jq -nRc '{"only": inputs, "os": "windows-latest"}' + } | jq -sc + ) + echo "include=$MATRIX_INCLUDE" >> $GITHUB_OUTPUT + cat $GITHUB_OUTPUT + working-directory: packages/vaex-core/ + build_wheels: - name: Build wheel on ${{ matrix.os }} + needs: build_wheels_matrix runs-on: ${{ matrix.os }} + name: Build ${{ matrix.only }} + strategy: fail-fast: false matrix: - os: [ubuntu-20.04, macOS-11, windows-latest] - python-version: [36, 37, 38, 39, 310] + include: ${{ fromJson(needs.build_wheels_matrix.outputs.include) }} + + permissions: + contents: write # for uploading release assets steps: - - uses: actions/checkout@v1 + - uses: actions/checkout@v4 with: submodules: true - name: Copy dll - if: (matrix.os == 'windows-latest') + if: matrix.os == 'windows-latest' uses: ./ci/actions/windll - - uses: actions/setup-python@v2 - name: Install Python - with: - python-version: '3.7' - - - name: Install cibuildwheel - run: | - python -m pip install cibuildwheel==2.8.1 - - - name: Install twine/wheel - run: | - python -m pip install twine wheel - - name: chores if: matrix.os != 'windows-latest' run: | - mkdir dist mkdir packages/vaex-core/bin cp bin/install_pcre.sh packages/vaex-core/bin/ - - name: Build vaex-core (windows) - if: (matrix.os == 'windows-latest') - env: - CIBW_BUILD: cp${{ matrix.python-version }}*64 - CIBW_ENVIRONMENT_LINUX: "CFLAGS='-Wl,-strip-all' CXXFLAGS='-Wl,-strip-all'" - run: | - python -m cibuildwheel --output-dir dist packages/vaex-core + - name: Set up QEMU + if: runner.os == 'Linux' + uses: docker/setup-qemu-action@v3 - - name: Build vaex-core (linux, osx) - if: (matrix.os != 'windows-latest') + - uses: pypa/cibuildwheel@v2.17.0 # sync version with pip install cibuildwheel above + with: + only: ${{ matrix.only }} + package-dir: packages/vaex-core/ + output-dir: packages/vaex-core/dist/ env: - CIBW_BEFORE_BUILD: bin/install_pcre.sh - CIBW_BUILD: "cp${{ matrix.python-version }}*64 cp${{ matrix.python-version }}-macosx-*" - CIBW_ENVIRONMENT_LINUX: "CFLAGS='-Wl,-strip-all' CXXFLAGS='-Wl,-strip-all'" + CIBW_BEFORE_BUILD: ${{ startswith(matrix.os, 'ubuntu') && 'bash bin/install_pcre.sh' || startswith(matrix.os, 'macos') && 'sudo -E bash bin/install_pcre.sh' || '' }} CIBW_BUILD_VERBOSITY: 2 - # needed for m1 builds - CIBW_ENVIRONMENT_MACOS: "CFLAGS='-I/usr/local/include -L/usr/local/lib' CXXFLAGS='-I/usr/local/include -L/usr/local/lib' LDFLAGS='-L/usr/local/lib'" - CIBW_ARCHS_MACOS: "x86_64 arm64" - run: | - python -m cibuildwheel --output-dir dist packages/vaex-core + # cargo needed because of missing blake3 wheels (aarch64 and musllinux missing) + # cargo added to path in CIBW_ENVIRONMENT_LINUX below + CIBW_BEFORE_TEST: ${{ startswith(matrix.os, 'ubuntu') && 'curl https://sh.rustup.rs -sSf | sh -s -- -y' || '' }} + # no test on musllinux due to missing pyarrow wheels ref https://github.com/apache/arrow/pull/40177 + CIBW_TEST_SKIP: '*musllinux*' + CIBW_TEST_COMMAND: python -c "import vaex; print(vaex.from_arrays(x=[1,2]))" + CIBW_MANYLINUX_X86_64_IMAGE: manylinux_2_28 + CIBW_MANYLINUX_AARCH64_IMAGE: manylinux_2_28 + CIBW_ENVIRONMENT_LINUX: 'CFLAGS="-Wl,-strip-all" CXXFLAGS="-Wl,-strip-all" PATH="$HOME/.cargo/bin:$PATH"' + CIBW_ENVIRONMENT_MACOS: 'CFLAGS="-I/usr/local/include -L/usr/local/lib" CXXFLAGS="-I/usr/local/include -L/usr/local/lib" LDFLAGS="-L/usr/local/lib"' - - name: Make source distribution - shell: bash - run: | - cd packages/vaex-core && python setup.py sdist + - name: Upload release assets + if: github.event_name == 'release' + uses: softprops/action-gh-release@v0.1.15 + with: + files: packages/vaex-core/dist/* - - name: Copy build artifacts - shell: bash + - uses: actions/setup-python@v5 + with: + python-version: "3.x" + + - name: Make source distribution run: | - cp packages/vaex-core/dist/* dist/ + pip install setuptools + python setup.py sdist + working-directory: packages/vaex-core/ - - uses: actions/upload-artifact@v1 + - uses: actions/upload-artifact@v4 with: - name: distributions - path: ./dist + name: distribution-${{ matrix.only }} + path: packages/vaex-core/dist - name: Publish a Python distribution to PyPI + if: startsWith(github.ref, 'refs/tags/core') env: TWINE_USERNAME: __token__ TWINE_PASSWORD: ${{ secrets.pypi_password_vaex_core }} run: | + pip install twine wheel openssl sha256 dist/* - twine upload --skip-existing dist/vaex?core* + twine upload --skip-existing + working-directory: packages/vaex-core/ diff --git a/.readthedocs.yaml b/.readthedocs.yaml new file mode 100644 index 0000000000..471f074fe8 --- /dev/null +++ b/.readthedocs.yaml @@ -0,0 +1,36 @@ +--- +# .readthedocs.yml +# Read the Docs configuration file +# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details + +# Required +version: 2 + +# Set the OS, Python version and other tools you might need +build: + os: ubuntu-22.04 + tools: + python: '3.12' + +# Build documentation in the docs/ directory with Sphinx +sphinx: + configuration: docs/source/conf.py + +# Build documentation with MkDocs +#mkdocs: +# configuration: mkdocs.yml + +# Optionally build your docs in additional formats such as PDF and ePub +formats: all + +# Pull submodules +submodules: + include: all + recursive: true + +# Optional but recommended, declare the Python requirements required +# to build your documentation +# See https://docs.readthedocs.io/en/stable/guides/reproducible-builds.html +python: + install: + - requirements: requirements_rtd.txt diff --git a/bin/webveax b/bin/webveax index 43146b6a99..0a694002e1 100644 --- a/bin/webveax +++ b/bin/webveax @@ -12,10 +12,10 @@ import sys import time import itertools import platform +from importlib.machinery import SourceFileLoader import vaex.dataset -import imp #import matplotlib @@ -57,7 +57,7 @@ custom_module = None for path in path_list: if os.path.exists(path): print "loading", path - custom_module = imp.load_source('vaex.web.custom', path) + custom_module = SourceFileLoader('vaex.web.custom', path).load_module() else: print >>sys.stderr, path, "does not exist" if custom_module: @@ -244,4 +244,4 @@ def api_density_xy(name, x, y=None): if __name__ == '__main__': #flask.url_for('static', filename='test.html') app.run(host='0.0.0.0') - #api_density_xy("Aq-A-2-999-shuffled-1percent", "x", "y") \ No newline at end of file + #api_density_xy("Aq-A-2-999-shuffled-1percent", "x", "y") diff --git a/ci/05-run-notebooks.sh b/ci/05-run-notebooks.sh index ccbd1bcab7..ba404e9673 100755 --- a/ci/05-run-notebooks.sh +++ b/ci/05-run-notebooks.sh @@ -22,8 +22,8 @@ cd docs/source python -m nbconvert --TagRemovePreprocessor.remove_cell_tags="('skip-ci',)" --to html --execute guides/io.ipynb --ExecutePreprocessor.timeout=240 python -m nbconvert --TagRemovePreprocessor.remove_cell_tags="('skip-ci',)" --to html --execute guides/ml_iris.ipynb --ExecutePreprocessor.timeout=240 python -m nbconvert --TagRemovePreprocessor.remove_cell_tags="('skip-ci',)" --to html --execute guides/ml_titanic.ipynb --ExecutePreprocessor.timeout=240 - python -m nbconvert --TagRemovePreprocessor.remove_cell_tags="('skip-ci',)" --to html --execute example_jupyter_plotly.ipynb --ExecutePreprocessor.timeout=240 - python -m nbconvert --TagRemovePreprocessor.remove_cell_tags="('skip-ci',)" --to html --execute example_jupyter_ipyvolume.ipynb --ExecutePreprocessor.timeout=240 +# python -m nbconvert --TagRemovePreprocessor.remove_cell_tags="('skip-ci',)" --to html --execute example_jupyter_plotly.ipynb --ExecutePreprocessor.timeout=240 +# python -m nbconvert --TagRemovePreprocessor.remove_cell_tags="('skip-ci',)" --to html --execute example_jupyter_ipyvolume.ipynb --ExecutePreprocessor.timeout=240 # this does not seem to work on osx: # # make sure the ~/.ipython dir exists because multiple processes might try to create it diff --git a/ci/conda-env.yml b/ci/conda-env.yml index 635349147b..6be1ce303e 100644 --- a/ci/conda-env.yml +++ b/ci/conda-env.yml @@ -10,6 +10,7 @@ dependencies: - cython - cachetools - catboost +- dask<2024.2.0 - diskcache - filelock - fsspec<2022.2.0 @@ -20,12 +21,11 @@ dependencies: - h5py - httpx # for testing with starlette/fastapi - ipyvolume=0.6.0a6 -- lightgbm - matplotlib-base - nest-asyncio<1.5.2 - notebook - numba -- numpy<1.21 +- numpy<2 - pandas - pcre - pip==20.3.4 @@ -34,7 +34,7 @@ dependencies: - pyarrow>=5.0.0 - pyqt - pytest -- pytest-asyncio<0.14 +- pytest-asyncio - pytest-mpl - pytest-timeout - python-graphviz @@ -46,11 +46,12 @@ dependencies: - scipy - tabulate - tornado -- uvicorn<0.16 -- xarray<2022.6.0 +- uvicorn +- xarray # currently not using this, since the test that requires this is flakey # - myst-parser<0.18 # 0.18 breaks our test, missing main - pytz # for some reason, pip has trouble resolving this, and the libraries below - python-utils - progressbar2 - zipp<3.16.0 +- lightgbm>=4.0.0 diff --git a/docs/source/conf.py b/docs/source/conf.py index be71f4a956..f1545fdacd 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -89,7 +89,7 @@ print("failed finding vaex module, try finding version") import sys import os - import imp + from importlib.machinery import SourceFileLoader def system(cmd): print("Executing: ", cmd) @@ -102,7 +102,7 @@ def system(cmd): if not os.path.exists(path_version_file): system("version=`git describe --tags --long`; python/vaex/vaex/setversion.py ${version}") - version = imp.load_source('version', path_version_file) + version = SourceFileLoader('version', path_version_file).load_module() # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. @@ -174,9 +174,6 @@ def system(cmd): extra_navbar = "

dsadsa

" -html_sidebars = { - "**": ["sbt-sidebar-footer.html", "sbt-sidebar-nav.html", "sidebar-search-bs.html"] -} html_logo = "_static/logo-grey.svg" html_favicon = "_static/vaex_alt.png" html_baseurl = "https://vaex.io/docs/" diff --git a/docs/source/datasets.ipynb b/docs/source/datasets.ipynb index dc9774db35..9795f60f6d 100644 --- a/docs/source/datasets.ipynb +++ b/docs/source/datasets.ipynb @@ -31,13 +31,13 @@ "One can also stream the data directly from S3. Only the data that is necessary will be streamed, and it will cached locally:\n", "```\n", "import vaex\n", - "df = vaex.open('s3://vaex/taxi/yellow_taxi_2015_f32s.hdf5?anon=true')\n", + "df = vaex.open('s3://vaex/taxi/nyc_taxi_2015_mini.hdf5?anon=true')\n", "```" ] }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2020-06-04T13:46:26.924698Z", diff --git a/docs/source/tutorial.ipynb b/docs/source/tutorial.ipynb index 0772357c5e..039fea5f97 100644 --- a/docs/source/tutorial.ipynb +++ b/docs/source/tutorial.ipynb @@ -799,7 +799,7 @@ ], "source": [ "# Read in the NYC Taxi dataset straight from S3\n", - "nyctaxi = vaex.open('s3://vaex/taxi/yellow_taxi_2009_2015_f32.hdf5?anon=true')\n", + "nyctaxi = vaex.open('s3://vaex/taxi/nyc_taxi_2015_mini.hdf5?anon=true')\n", "nyctaxi.head(5)" ] }, diff --git a/packages/vaex-arrow/setup.py b/packages/vaex-arrow/setup.py index d12bf8b7c0..5cd9f033b7 100644 --- a/packages/vaex-arrow/setup.py +++ b/packages/vaex-arrow/setup.py @@ -1,10 +1,10 @@ import os -import imp from setuptools import setup +from importlib.machinery import SourceFileLoader dirname = os.path.dirname(__file__) path_version = os.path.join(dirname, "vaex_arrow/_version.py") -version = imp.load_source('version', path_version) +version = SourceFileLoader('version', path_version).load_module() name = 'vaex' diff --git a/packages/vaex-astro/setup.py b/packages/vaex-astro/setup.py index 7556cda2fa..26006116e3 100644 --- a/packages/vaex-astro/setup.py +++ b/packages/vaex-astro/setup.py @@ -1,10 +1,10 @@ import os -import imp from setuptools import setup +from importlib.machinery import SourceFileLoader dirname = os.path.dirname(__file__) path_version = os.path.join(dirname, 'vaex/astro/_version.py') -version = imp.load_source('version', path_version) +version = SourceFileLoader('version', path_version).load_module() name = 'vaex' author = 'Maarten A. Breddels' diff --git a/packages/vaex-contrib/setup.py b/packages/vaex-contrib/setup.py index 2aba0a1c3f..61edc62354 100644 --- a/packages/vaex-contrib/setup.py +++ b/packages/vaex-contrib/setup.py @@ -1,10 +1,10 @@ import os -import imp +from importlib.machinery import SourceFileLoader from setuptools import setup dirname = os.path.dirname(__file__) path_version = os.path.join(dirname, 'vaex/contrib/_version.py') -version = imp.load_source('version', path_version) +version = SourceFileLoader('version', path_version).load_module() name = 'vaex' license = 'MIT' diff --git a/packages/vaex-core/pyproject.toml b/packages/vaex-core/pyproject.toml index 64ee8280fb..df5666945b 100644 --- a/packages/vaex-core/pyproject.toml +++ b/packages/vaex-core/pyproject.toml @@ -1,8 +1,6 @@ [build-system] # Minimum requirements for the build system to execute. requires = [ - "setuptools", - "wheel", "oldest-supported-numpy", "scikit-build", "cmake", diff --git a/packages/vaex-core/setup.py b/packages/vaex-core/setup.py index b5c98c18ca..cba3be7472 100644 --- a/packages/vaex-core/setup.py +++ b/packages/vaex-core/setup.py @@ -1,7 +1,7 @@ from setuptools import setup import sys import os -import imp +from importlib.machinery import SourceFileLoader from setuptools import Extension import platform @@ -15,7 +15,7 @@ dirname = os.path.dirname(__file__) path_version = os.path.join(dirname, "vaex/core/_version.py") -version = imp.load_source('version', path_version) +version = SourceFileLoader('version', path_version).load_module() name = 'vaex' author = "Maarten A. Breddels" @@ -26,7 +26,8 @@ # TODO: can we do without requests and progressbar2? # TODO: after python2 supports frops, future and futures can also be dropped # TODO: would be nice to have astropy only as dep in vaex-astro -install_requires_core = ["numpy>=1.16", "aplus", "tabulate>=0.8.3", +setup_requires = ["numpy~=1.16"] +install_requires_core = ["numpy~=1.16", "aplus", "tabulate>=0.8.3", "future>=0.15.2", "pyyaml", "progressbar2", "requests", "six", "cloudpickle", "pandas", "dask!=2022.4.0", "nest-asyncio>=1.3.3", "pyarrow>=5.0.0", "frozendict!=2.2.0", @@ -175,7 +176,7 @@ def __str__(self): url=url, author=author, author_email=author_email, - setup_requires=['numpy'], + setup_requires=setup_requires, install_requires=install_requires_core, license=license, package_data={'vaex': dll_files + ['test/files/*.fits', 'test/files/*.vot', 'test/files/*.hdf5']}, @@ -183,12 +184,12 @@ def __str__(self): include_package_data=True, ext_modules=([extension_vaexfast] if on_rtd else [extension_vaexfast, extension_strings, extension_superutils, extension_superagg]) if not use_skbuild else [], zip_safe=False, + python_requires=">=3.8", classifiers=[ - "Programming Language :: Python :: 3.6", - "Programming Language :: Python :: 3.7", "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", ], extras_require={ 'all': ["gcsfs>=0.6.2", "s3fs"] diff --git a/packages/vaex-core/src/hash_object.cpp b/packages/vaex-core/src/hash_object.cpp index dfd99c8db4..a86287cc3a 100644 --- a/packages/vaex-core/src/hash_object.cpp +++ b/packages/vaex-core/src/hash_object.cpp @@ -13,15 +13,6 @@ typedef SSIZE_T ssize_t; namespace py = pybind11; -namespace std { - template<> - struct hash { - size_t operator()(const PyObject *const &o) const { - return PyObject_Hash((PyObject*)o); - } - }; -} - namespace vaex { struct CompareObjects diff --git a/packages/vaex-core/src/superstring.hpp b/packages/vaex-core/src/superstring.hpp index b62d492bc2..924be203bc 100644 --- a/packages/vaex-core/src/superstring.hpp +++ b/packages/vaex-core/src/superstring.hpp @@ -3,6 +3,7 @@ #include #include +#include typedef nonstd::string_view string_view; typedef std::string string; #include diff --git a/packages/vaex-core/vaex/__init__.py b/packages/vaex-core/vaex/__init__.py index 4190eb9f9e..5724d9b938 100644 --- a/packages/vaex-core/vaex/__init__.py +++ b/packages/vaex-core/vaex/__init__.py @@ -155,8 +155,8 @@ def open(path, convert=False, progress=None, shuffle=False, fs_options={}, fs=No Examples: - >>> df = vaex.open('s3://vaex/taxi/yellow_taxi_2015_f32s.hdf5', fs_options={{'anonymous': True}}) - >>> df = vaex.open('s3://vaex/taxi/yellow_taxi_2015_f32s.hdf5?anon=true') + >>> df = vaex.open('s3://vaex/taxi/nyc_taxi_2015_mini.hdf5?anon=true', fs_options={{'anonymous': True}}) + >>> df = vaex.open('s3://vaex/taxi/nyc_taxi_2015_mini.hdf5?anon=true?anon=true') >>> df = vaex.open('s3://mybucket/path/to/file.hdf5', fs_options={{'access_key': my_key, 'secret_key': my_secret_key}}) >>> df = vaex.open(f's3://mybucket/path/to/file.hdf5?access_key={{my_key}}&secret_key={{my_secret_key}}') >>> df = vaex.open('s3://mybucket/path/to/file.hdf5?profile=myproject') diff --git a/packages/vaex-core/vaex/expression.py b/packages/vaex-core/vaex/expression.py index 3e36e42dea..a795a17000 100644 --- a/packages/vaex-core/vaex/expression.py +++ b/packages/vaex-core/vaex/expression.py @@ -1138,7 +1138,7 @@ def jit_pythran(self, verbose=False): if not verbose: logger.setLevel(logging.ERROR) import pythran - import imp + from importlib.machinery import SourceFileLoader import hashlib # self._import_all(module) names = [] @@ -1165,7 +1165,7 @@ def f({0}): # print(m.hexdigest()) module_path = pythran.compile_pythrancode(module_name, code, extra_compile_args=["-DBOOST_SIMD", "-march=native"] + [] if verbose else ["-w"]) - module = imp.load_dynamic(module_name, module_path) + module = SourceFileLoader(module_name, module_path).load_module() function_name = "f_" + m.hexdigest() function = self.ds.add_function(function_name, module.f, unique=True) diff --git a/packages/vaex-core/vaex/test/dataset.py b/packages/vaex-core/vaex/test/dataset.py index 8c4a57b79d..acfd5a9cff 100644 --- a/packages/vaex-core/vaex/test/dataset.py +++ b/packages/vaex-core/vaex/test/dataset.py @@ -1213,6 +1213,10 @@ def test_export_sorted_arrow(self): np.testing.assert_array_equal(self.dataset.data.x[self.zero_index:self.zero_index+10], np.array(ds2.data.x)[::-1]) def test_export(self): + if vx.utils.osname == "windows": + # we hit https://github.com/h5py/h5py/issues/2346 on windows + # possible due to changes in GHA? + return path = path_hdf5 = tempfile.mktemp(".hdf5") path_fits = tempfile.mktemp(".fits") path_fits_astropy = tempfile.mktemp(".fits") diff --git a/packages/vaex-core/vendor/pybind11 b/packages/vaex-core/vendor/pybind11 index 25abf7efba..a500f439d0 160000 --- a/packages/vaex-core/vendor/pybind11 +++ b/packages/vaex-core/vendor/pybind11 @@ -1 +1 @@ -Subproject commit 25abf7efba0b2990f5a6dfb0a31bc65c0f2f4d17 +Subproject commit a500f439d06d220ee2c680cdd2c8828eac8e7dfc diff --git a/packages/vaex-graphql/setup.py b/packages/vaex-graphql/setup.py index c0e73d3162..8a6e8a0860 100644 --- a/packages/vaex-graphql/setup.py +++ b/packages/vaex-graphql/setup.py @@ -1,10 +1,10 @@ import os -import imp +from importlib.machinery import SourceFileLoader from setuptools import setup dirname = os.path.dirname(__file__) path_version = os.path.join(dirname, 'vaex/graphql/_version.py') -version = imp.load_source('version', path_version) +version = SourceFileLoader('version', path_version).load_module() name = 'vaex' author = 'Maarten A. Breddels' diff --git a/packages/vaex-hdf5/setup.py b/packages/vaex-hdf5/setup.py index 83c0fb74d9..b21f664df5 100644 --- a/packages/vaex-hdf5/setup.py +++ b/packages/vaex-hdf5/setup.py @@ -1,11 +1,11 @@ import os -import imp +from importlib.machinery import SourceFileLoader from setuptools import setup from setuptools import Extension dirname = os.path.dirname(__file__) path_version = os.path.join(dirname, "vaex/hdf5/_version.py") -version = imp.load_source('version', path_version) +version = SourceFileLoader('version', path_version).load_module() name = 'vaex' author = "Maarten A. Breddels" diff --git a/packages/vaex-jupyter/setup.py b/packages/vaex-jupyter/setup.py index 14a88ab5f6..467f4b375b 100644 --- a/packages/vaex-jupyter/setup.py +++ b/packages/vaex-jupyter/setup.py @@ -1,11 +1,11 @@ import os -import imp +from importlib.machinery import SourceFileLoader from setuptools import setup from setuptools import Extension dirname = os.path.dirname(__file__) path_version = os.path.join(dirname, 'vaex/jupyter/_version.py') -version = imp.load_source('version', path_version) +version = SourceFileLoader('version', path_version).load_module() name = 'vaex' diff --git a/packages/vaex-meta/setup.py b/packages/vaex-meta/setup.py index 407578e97b..7b103103a0 100644 --- a/packages/vaex-meta/setup.py +++ b/packages/vaex-meta/setup.py @@ -1,11 +1,11 @@ import os -import imp +from importlib.machinery import SourceFileLoader from setuptools import setup from setuptools import Extension dirname = os.path.dirname(__file__) path_version = os.path.join(dirname, 'vaex/meta/_version.py') -version = imp.load_source('version', path_version) +version = SourceFileLoader('version', path_version).load_module() name = 'vaex' diff --git a/packages/vaex-ml/setup.py b/packages/vaex-ml/setup.py index e2a19be820..07e76a82ea 100644 --- a/packages/vaex-ml/setup.py +++ b/packages/vaex-ml/setup.py @@ -1,10 +1,10 @@ import os -import imp +from importlib.machinery import SourceFileLoader from setuptools import setup dirname = os.path.dirname(__file__) path_version = os.path.join(dirname, 'vaex/ml/_version.py') -version = imp.load_source('version', path_version) +version = SourceFileLoader('version', path_version).load_module() name = 'vaex' author = 'Jovan Veljanoski' diff --git a/packages/vaex-ml/vaex/ml/lightgbm.py b/packages/vaex-ml/vaex/ml/lightgbm.py index 5fb83ea458..4dbec9d42f 100644 --- a/packages/vaex-ml/vaex/ml/lightgbm.py +++ b/packages/vaex-ml/vaex/ml/lightgbm.py @@ -86,7 +86,7 @@ def transform(self, df): copy.add_virtual_column(self.prediction_name, expression, unique=False) return copy - def fit(self, df, valid_sets=None, valid_names=None, early_stopping_rounds=None, evals_result=None, verbose_eval=None, **kwargs): + def fit(self, df, valid_sets=None, valid_names=None, early_stopping_rounds=None, evals_result=None, verbose_eval=False, **kwargs): """Fit the LightGBMModel to the DataFrame. The model will train until the validation score stops improving. @@ -112,14 +112,19 @@ def fit(self, df, valid_sets=None, valid_names=None, early_stopping_rounds=None, else: valid_sets = () + callbacks = [ + lightgbm.callback.record_evaluation(eval_result=evals_result) if evals_result is not None else None, + lightgbm.callback.early_stopping(stopping_rounds=early_stopping_rounds) if early_stopping_rounds else None, + lightgbm.callback.log_evaluation() if verbose_eval else None + ] + callbacks = [callback for callback in callbacks if callback is not None] + self.booster = lightgbm.train(params=self.params, train_set=dtrain, num_boost_round=self.num_boost_round, valid_sets=valid_sets, valid_names=valid_names, - early_stopping_rounds=early_stopping_rounds, - evals_result=evals_result, - verbose_eval=verbose_eval, + callbacks=callbacks, **kwargs) def predict(self, df, **kwargs): diff --git a/packages/vaex-server/setup.py b/packages/vaex-server/setup.py index 2470e88b5e..37916d450c 100644 --- a/packages/vaex-server/setup.py +++ b/packages/vaex-server/setup.py @@ -1,11 +1,11 @@ import os -import imp +from importlib.machinery import SourceFileLoader from setuptools import setup from setuptools import Extension dirname = os.path.dirname(__file__) path_version = os.path.join(dirname, 'vaex/server/_version.py') -version = imp.load_source('version', path_version) +version = SourceFileLoader('version', path_version).load_module() name = 'vaex' diff --git a/packages/vaex-ui/setup.py b/packages/vaex-ui/setup.py index 09ec130b08..401942eb6f 100644 --- a/packages/vaex-ui/setup.py +++ b/packages/vaex-ui/setup.py @@ -1,11 +1,11 @@ import os -import imp +from importlib.machinery import SourceFileLoader from setuptools import setup from setuptools import Extension dirname = os.path.dirname(__file__) path_version = os.path.join(dirname, "vaex/ui/_version.py") -version = imp.load_source('version', path_version) +version = SourceFileLoader('version', path_version).load_module() name = 'vaex' author = "Maarten A. Breddels" diff --git a/packages/vaex-ui/vaex/ui/main.py b/packages/vaex-ui/vaex/ui/main.py index a1d44cb119..83716cf023 100644 --- a/packages/vaex-ui/vaex/ui/main.py +++ b/packages/vaex-ui/vaex/ui/main.py @@ -76,7 +76,7 @@ def getfilesystemencoding_wrapper(): # import subspacefind # import ctypes -import imp +from importlib.machinery import SourceFileLoader import logging logger = logging.getLogger("vaex") @@ -92,7 +92,7 @@ def getfilesystemencoding_wrapper(): custompath = path = os.path.expanduser('~/.vaex/custom.py') # print path if os.path.exists(path): - customModule = imp.load_source('vaex.custom', path) + customModule = SourceFileLoader('vaex.custom', path).load_module() # custom = customModule.Custom() else: custom = None @@ -1164,7 +1164,7 @@ def do(ignore=None, dim=dim, N=N, power=power, name=name): logger.debug("plugin file: %s" % path) filename = os.path.basename(path) name = os.path.splitext(filename)[0] - imp.load_source('vaexuser.plugin.' + name, path) + SourceFileLoader('vaexuser.plugin.' + name, path).load_module() self.open_generators = [] # for reference counts self.action_open_hdf5_gadget.triggered.connect(self.openGenerator(self.gadgethdf5, "Gadget HDF5 file", "*.hdf5")) diff --git a/packages/vaex-viz/setup.py b/packages/vaex-viz/setup.py index d824616093..e803574a0a 100644 --- a/packages/vaex-viz/setup.py +++ b/packages/vaex-viz/setup.py @@ -1,10 +1,10 @@ import os -import imp +from importlib.machinery import SourceFileLoader from setuptools import setup dirname = os.path.dirname(__file__) path_version = os.path.join(dirname, "vaex/viz/_version.py") -version = imp.load_source('version', path_version) +version = SourceFileLoader('version', path_version).load_module() name = 'vaex' diff --git a/py2app.py b/py2app.py index 4abc694cd8..c740d49b72 100644 --- a/py2app.py +++ b/py2app.py @@ -12,7 +12,7 @@ from setuptools import setup, Extension import platform import sys -import imp +from importlib.machinery import SourceFileLoader def system(cmd): print("Executing: ", cmd) @@ -25,7 +25,7 @@ def system(cmd): if not os.path.exists(path_version_file): system("version=`git describe --tags --long`; vaex/setversion.py ${version}") -version = imp.load_source('version', path_version_file) +version = SourceFileLoader('version', path_version_file).load_module() #system("version=`git describe --tags --long`; vaex/vaex/setversion.py ${version}") diff --git a/requirements-ml.txt b/requirements-ml.txt index fde5372d8f..b4ab89ed63 100644 --- a/requirements-ml.txt +++ b/requirements-ml.txt @@ -1,7 +1,7 @@ annoy scikit-learn xgboost -lightgbm +lightgbm>4 catboost # tensorflow==2.1.0 # tensorflow-io==0.12.0 diff --git a/requirements_rtd.txt b/requirements_rtd.txt index b4334c1d42..edff735a07 100644 --- a/requirements_rtd.txt +++ b/requirements_rtd.txt @@ -1,4 +1,6 @@ -sphinx-book-theme==0.0.39b1 +-e . +sphinx +sphinx-book-theme sphinx-sitemap sphinxcontrib-googleanalytics sphinxext-rediraffe @@ -24,4 +26,3 @@ xgboost sphinx_gallery catboost myst-parser -sphinx==4.2.0 diff --git a/tests/export_test.py b/tests/export_test.py index 41c40f6299..12a6c96cd3 100644 --- a/tests/export_test.py +++ b/tests/export_test.py @@ -194,7 +194,7 @@ def test_export_unicode_column_name_hdf5(tmpdir): # assert df[0:2].s.tolist() == ['x', 'x'] # assert df[-3:-1].s.tolist() == ['y', 'y'] -@pytest.mark.parametrize("dtypes", [{}, {'name': np.object, 'age': 'Int64', 'weight': np.float}]) +@pytest.mark.parametrize("dtypes", [{}, {'name': object, 'age': 'Int64', 'weight': np.float64}]) def test_multi_file_naive_read_convert_export(tmpdir, dtypes): current_dir = os.path.dirname(__file__) @@ -236,7 +236,7 @@ def test_export_csv(df_local, tmpdir): assert '123456' in vaex.open(path) -@pytest.mark.parametrize("dtypes", [{}, {'name': np.object, 'age': 'Int64', 'weight': np.float}]) +@pytest.mark.parametrize("dtypes", [{}, {'name': object, 'age': 'Int64', 'weight': np.float64}]) def test_export_generates_same_hdf5_shasum(tmpdir, dtypes): current_dir = os.path.dirname(__file__) diff --git a/tests/ml/lightgbm_test.py b/tests/ml/lightgbm_test.py index f3aa6f7cf0..87ff935848 100644 --- a/tests/ml/lightgbm_test.py +++ b/tests/ml/lightgbm_test.py @@ -1,5 +1,8 @@ import sys import pytest +# py 36 and 37 give a syntax error +if sys.version_info < (3, 8): + pytest.skip(allow_module_level=True) pytest.importorskip("lightgbm") import numpy as np @@ -132,6 +135,7 @@ def test_lightgbm_validation_set(df_example): @pytest.mark.skipif(sys.version_info < (3, 6), reason="requires python3.6 or higher") +@pytest.mark.skipif(vaex.utils.osname == 'osx', reason="hangs for unknown reason on osx") def test_lightgbm_pipeline(df_example): ds = df_example # train test splot diff --git a/tests/ml/sklearn_test.py b/tests/ml/sklearn_test.py index 6e18efa6c4..96830aeccd 100644 --- a/tests/ml/sklearn_test.py +++ b/tests/ml/sklearn_test.py @@ -1,6 +1,11 @@ import platform - +import sys import pytest +# py 36 and 37 not supported +if sys.version_info < (3, 8): + pytest.skip(allow_module_level=True) + + import vaex pytest.importorskip("sklearn") from vaex.ml.sklearn import Predictor, IncrementalPredictor @@ -206,7 +211,7 @@ def test_sklearn_incremental_predictor_classification(prediction_type, df_iris_1 features = df_train.column_names[:4] target = 'class_' - incremental = IncrementalPredictor(model=SGDClassifier(loss='log', learning_rate='constant', eta0=0.01), + incremental = IncrementalPredictor(model=SGDClassifier(loss='log_loss', learning_rate='constant', eta0=0.01), features=features, target=target, batch_size=10_000, diff --git a/tests/ml/xgboost_test.py b/tests/ml/xgboost_test.py index 9a1a5dd371..c9c264db98 100644 --- a/tests/ml/xgboost_test.py +++ b/tests/ml/xgboost_test.py @@ -17,7 +17,6 @@ 'objective': 'multi:softmax', # learning task objective 'num_class': 3, # number of target classes (if classification) 'random_state': 42, # fixes the seed, for reproducibility - 'silent': 1, # silent mode 'n_jobs': -1 # cpu cores used } @@ -32,14 +31,13 @@ 'min_child_weight': 1, # minimum sum of instance weight (hessian) needed in a child 'objective': 'reg:linear', # learning task objective 'random_state': 42, # fixes the seed, for reproducibility - 'silent': 1, # silent mode 'n_jobs': -1 # cpu cores used } def test_xgboost(df_iris): ds = df_iris - ds_train, ds_test = ds.ml.train_test_split(test_size=0.2, verbose=False) + ds_train, ds_test = ds.ml.train_test_split(test_size=0.1, verbose=False) features = ['sepal_length', 'sepal_width', 'petal_length', 'petal_width'] booster = vaex.ml.xgboost.XGBoostModel(num_boost_round=10, params=params_multiclass, @@ -104,7 +102,6 @@ def test_xgboost_validation_set(df_example): # fit the booster - including saving the history of the validation sets booster.fit(train, evals=[(train, 'train'), (test, 'test')], early_stopping_rounds=2, evals_result=history) - assert booster.booster.best_ntree_limit == 10 assert booster.booster.best_iteration == 9 assert len(history['train']['rmse']) == 10 assert len(history['test']['rmse']) == 10