From f9103949ec79bb57367b7c1157ff820439eae08e Mon Sep 17 00:00:00 2001 From: awxkee Date: Tue, 8 Oct 2024 20:32:07 +0100 Subject: [PATCH] Runtime dispatch, added Smpte428, rayon multithreading --- .github/workflows/build_push.yml | 10 +- .github/workflows/publish_release.yml | 8 +- Cargo.lock | 3 +- Cargo.toml | 9 +- LICENSE-APACHE.md | 201 +++++++++++++++ LICENSE-BSD.md | 26 -- LICENSE.md | 227 ++--------------- README.md | 11 +- src/app/Cargo.toml | 2 +- src/app/src/main.rs | 14 +- src/gamma_curves.rs | 20 ++ src/hsv_to_image.rs | 186 ++++++++++---- src/image_to_hsv.rs | 201 ++++++++++----- src/image_to_jzazbz.rs | 238 ++++++++++++------ src/image_to_lalphabeta.rs | 142 +++++++---- src/image_to_linear.rs | 181 +++++++++----- src/image_to_linear_u8.rs | 107 +++++--- src/image_to_oklab.rs | 204 +++++++++++----- src/image_to_sigmoidal.rs | 153 ++++++++---- src/image_to_xyz_lab.rs | 339 +++++++++++++++++++------- src/image_xyza_laba.rs | 245 +++++++++++++------ src/jzazbz_to_image.rs | 174 +++++++++---- src/lalphabeta_to_image.rs | 116 ++++++--- src/linear_to_image.rs | 202 ++++++++++----- src/linear_to_image_u8.rs | 165 ++++++++----- src/linear_to_planar.rs | 115 ++++++--- src/neon/gamma_curves.rs | 44 ++-- src/neon/linear_to_planar.rs | 40 ++- src/neon/planar_to_linear.rs | 31 +-- src/neon/to_linear_u8.rs | 61 +++-- src/oklab_to_image.rs | 182 +++++++++----- src/planar_to_linear.rs | 106 +++++--- src/sigmoidal_to_image.rs | 148 +++++++---- src/sse/image_to_jzazbz.rs | 55 ++++- src/sse/image_to_linear_u8.rs | 48 +++- src/sse/image_to_oklab.rs | 4 +- src/sse/xyz_lab_to_image.rs | 4 +- src/sse/xyza_laba_to_image.rs | 3 +- src/xyz.rs | 7 +- src/xyz_lab_to_image.rs | 286 +++++++++++++++++----- src/xyza_laba_to_image.rs | 193 ++++++++++----- 41 files changed, 3069 insertions(+), 1442 deletions(-) create mode 100644 LICENSE-APACHE.md delete mode 100644 LICENSE-BSD.md diff --git a/.github/workflows/build_push.yml b/.github/workflows/build_push.yml index 743d420..f0c9029 100644 --- a/.github/workflows/build_push.yml +++ b/.github/workflows/build_push.yml @@ -24,11 +24,11 @@ jobs: - uses: actions/checkout@v4 - uses: actions-rust-lang/setup-rust-toolchain@v1 - run: rustup target add aarch64-unknown-linux-gnu x86_64-unknown-linux-gnu i686-unknown-linux-gnu powerpc-unknown-linux-gnu - - run: RUSTFLAGS="-C target-feature=+neon" cargo build --target aarch64-unknown-linux-gnu - - run: RUSTFLAGS="-C target-feature=+sse4.1" cargo build --target i686-unknown-linux-gnu - - run: RUSTFLAGS="-C target-feature=+avx2" cargo build --target i686-unknown-linux-gnu + - run: RUSTFLAGS="-C target-feature=+neon" cargo build --all-features --target aarch64-unknown-linux-gnu + - run: RUSTFLAGS="-C target-feature=+sse4.1" cargo build --all-features --target i686-unknown-linux-gnu + - run: RUSTFLAGS="-C target-feature=+avx2" cargo build --all-features --target i686-unknown-linux-gnu - run: cargo build --target powerpc-unknown-linux-gnu - - run: RUSTFLAGS="-C target-feature=+sse4.1" cargo build --target x86_64-unknown-linux-gnu - - run: RUSTFLAGS="-C target-feature=+avx2" cargo build --target x86_64-unknown-linux-gnu + - run: RUSTFLAGS="-C target-feature=+sse4.1" cargo build --all-features --target x86_64-unknown-linux-gnu + - run: RUSTFLAGS="-C target-feature=+avx2" cargo build --all-features --target x86_64-unknown-linux-gnu - name: Test release pipeline run: cargo publish --dry-run \ No newline at end of file diff --git a/.github/workflows/publish_release.yml b/.github/workflows/publish_release.yml index 1d59ef7..7d1ab25 100644 --- a/.github/workflows/publish_release.yml +++ b/.github/workflows/publish_release.yml @@ -18,10 +18,10 @@ jobs: - uses: actions/checkout@v4 - uses: actions-rust-lang/setup-rust-toolchain@v1 - run: rustup target add aarch64-unknown-linux-gnu x86_64-unknown-linux-gnu i686-unknown-linux-gnu powerpc-unknown-linux-gnu - - run: RUSTFLAGS="-C target-feature=+neon" cargo build --target aarch64-unknown-linux-gnu - - run: RUSTFLAGS="-C target-feature=+sse4.1" cargo build --target i686-unknown-linux-gnu - - run: cargo build --target powerpc-unknown-linux-gnu - - run: RUSTFLAGS="-C target-feature=+sse4.1" cargo build --target x86_64-unknown-linux-gnu + - run: RUSTFLAGS="-C target-feature=+neon" cargo build --all-features --target aarch64-unknown-linux-gnu + - run: RUSTFLAGS="-C target-feature=+sse4.1" cargo build --all-features --target i686-unknown-linux-gnu + - run: cargo build --target powerpc-unknown-linux-gnu --all-features + - run: RUSTFLAGS="-C target-feature=+sse4.1" cargo build --all-features --target x86_64-unknown-linux-gnu - name: Make a release env: CARGO_REGISTRY_TOKEN: ${{ secrets.CARGO_TOKEN }} diff --git a/Cargo.lock b/Cargo.lock index 204c76d..ae486dc 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -170,11 +170,12 @@ checksum = "3d7b894f5411737b7867f4827955924d7c254fc9f4d91a6aad6b097804b1018b" [[package]] name = "colorutils-rs" -version = "0.5.12" +version = "0.6.0" dependencies = [ "erydanos", "half", "num-traits", + "rayon", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index d494285..bd9fe6b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -2,12 +2,12 @@ workspace = { members = ["src/app"] } [package] name = "colorutils-rs" -version = "0.5.12" +version = "0.6.0" edition = "2021" description = "High performance utilities for color format handling and conversion." readme = "README.md" keywords = ["lab", "hsv", "xyz", "color", "colorspace"] -license = "Apache-2.0 OR BSD-3-Clause" +license = "BSD-3-Clause OR Apache-2.0" authors = ["Radzivon Bartoshyk"] documentation = "https://github.com/awxkee/colorutils-rs" categories = ["multimedia::images", "multimedia::video"] @@ -19,7 +19,8 @@ exclude = ["*.jpg"] erydanos = "0.2.15" half = "2.4.1" num-traits = "0.2.19" +rayon = { version = "1.10.0", optional = true } [features] -default = [] -rayon = [] \ No newline at end of file +default = ["rayon"] +rayon = ["dep:rayon"] diff --git a/LICENSE-APACHE.md b/LICENSE-APACHE.md new file mode 100644 index 0000000..86a13a8 --- /dev/null +++ b/LICENSE-APACHE.md @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright 2024 Radzivon Bartoshyk + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/LICENSE-BSD.md b/LICENSE-BSD.md deleted file mode 100644 index bf616fd..0000000 --- a/LICENSE-BSD.md +++ /dev/null @@ -1,26 +0,0 @@ -Copyright (c) Radzivon Bartoshyk. All rights reserved. - -Redistribution and use in source and binary forms, with or without modification, -are permitted provided that the following conditions are met: - -1. Redistributions of source code must retain the above copyright notice, this - list of conditions and the following disclaimer. - -2. Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - -3. Neither the name of the copyright holder nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. \ No newline at end of file diff --git a/LICENSE.md b/LICENSE.md index 86a13a8..bf616fd 100644 --- a/LICENSE.md +++ b/LICENSE.md @@ -1,201 +1,26 @@ - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright 2024 Radzivon Bartoshyk - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. +Copyright (c) Radzivon Bartoshyk. All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. \ No newline at end of file diff --git a/README.md b/README.md index 2764072..920328a 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # Rust utilities for color handling and conversion. -## The goal is to provide support for common conversion and SIMD options for most common conversion path for high-performance +The goal is to provide support for common conversion and SIMD options for most common conversion path for high-performance Available SIMD fast paths generally 5-10 times faster than naive implementations @@ -34,4 +34,11 @@ receive any benefits. Also, `fma` target feature for x86-64 is available. -Target feature at compile time `+avx2` must be activated to properly compile avx2 instructions. This is an important step even when runtime dispatch are used. \ No newline at end of file +Target feature at compile time `+avx2` must be activated to properly compile avx2 instructions. This is an important step even when runtime dispatch are used. + +This project is licensed under either of + +- BSD-3-Clause License (see [LICENSE](LICENSE.md)) +- Apache License, Version 2.0 (see [LICENSE](LICENSE-APACHE.md)) + +at your option. diff --git a/src/app/Cargo.toml b/src/app/Cargo.toml index 5613ff1..11d8558 100644 --- a/src/app/Cargo.toml +++ b/src/app/Cargo.toml @@ -4,6 +4,6 @@ version = "0.1.0" edition = "2021" [dependencies] -colorutils-rs = { path = "../../" } +colorutils-rs = { path = "../../", features = ["rayon"] } image = "0.25.1" okhsl = "1.0.1" \ No newline at end of file diff --git a/src/app/src/main.rs b/src/app/src/main.rs index 684c936..df5efad 100644 --- a/src/app/src/main.rs +++ b/src/app/src/main.rs @@ -39,6 +39,7 @@ fn main() { println!("dimensions {:?}", img.dimensions()); println!("{:?}", img.color()); + // let img = img.to_rgba8(); let mut src_bytes = img.as_bytes(); let width = dimensions.0; let height = dimensions.1; @@ -57,8 +58,7 @@ fn main() { // ); // src_bytes = &dst_rgba; - let mut dst_slice: Vec = Vec::new(); - dst_slice.resize(width as usize * components * height as usize, 0u8); + let mut dst_slice: Vec = vec![0u8; width as usize * components * height as usize]; { let mut lab_store: Vec = vec![]; @@ -66,14 +66,15 @@ fn main() { lab_store.resize(width as usize * components * height as usize, 0.); let src_stride = width * components as u32; let start_time = Instant::now(); - rgb_to_linear( + rgb_to_jzazbz( src_bytes, src_stride, &mut lab_store, store_stride as u32, width, height, - TransferFunction::Srgb, + 200., + TransferFunction::Smpte428, ); let elapsed_time = start_time.elapsed(); // Print the elapsed time in milliseconds @@ -101,14 +102,15 @@ fn main() { // } let start_time = Instant::now(); - linear_to_rgb( + jzazbz_to_rgb( &lab_store, store_stride as u32, &mut dst_slice, src_stride, width, height, - TransferFunction::Srgb, + 200., + TransferFunction::Smpte428, ); let elapsed_time = start_time.elapsed(); diff --git a/src/gamma_curves.rs b/src/gamma_curves.rs index b264511..01324cb 100644 --- a/src/gamma_curves.rs +++ b/src/gamma_curves.rs @@ -61,6 +61,20 @@ pub fn rec709_from_linear(linear: f32) -> f32 { } } +#[inline] +/// Linear transfer function for Smpte 428 +pub fn smpte428_to_linear(gamma: f32) -> f32 { + const SCALE: f32 = 1. / 0.91655527974030934f32; + gamma.max(0.).powf(2.6f32) * SCALE +} + +#[inline] +/// Gamma transfer function for Smpte 428 +pub fn smpte428_from_linear(linear: f32) -> f32 { + const POWER_VALUE: f32 = 1.0f32 / 2.6f32; + (0.91655527974030934f32 * linear.max(0.)).powf(POWER_VALUE) +} + #[inline(always)] /// Pure gamma transfer function for gamma 2.2 pub fn pure_gamma_function(x: f32, gamma: f32) -> f32 { @@ -109,15 +123,19 @@ pub enum TransferFunction { Gamma2p2, /// Pure gamma 2.8 Transfer function Gamma2p8, + /// Smpte 428 Transfer function + Smpte428, } impl From for TransferFunction { + #[inline] fn from(value: u8) -> Self { match value { 0 => TransferFunction::Srgb, 1 => TransferFunction::Rec709, 2 => TransferFunction::Gamma2p2, 3 => TransferFunction::Gamma2p8, + 4 => TransferFunction::Smpte428, _ => TransferFunction::Srgb, } } @@ -131,6 +149,7 @@ impl TransferFunction { TransferFunction::Rec709 => rec709_to_linear(v), TransferFunction::Gamma2p8 => gamma2p8_to_linear(v), TransferFunction::Gamma2p2 => gamma2p2_to_linear(v), + TransferFunction::Smpte428 => smpte428_to_linear(v), } } @@ -141,6 +160,7 @@ impl TransferFunction { TransferFunction::Rec709 => rec709_from_linear(v), TransferFunction::Gamma2p2 => gamma2p2_from_linear(v), TransferFunction::Gamma2p8 => gamma2p8_from_linear(v), + TransferFunction::Smpte428 => smpte428_to_linear(v), } } } diff --git a/src/hsv_to_image.rs b/src/hsv_to_image.rs index 6ccfc4f..0863b7f 100644 --- a/src/hsv_to_image.rs +++ b/src/hsv_to_image.rs @@ -14,6 +14,10 @@ use crate::neon::neon_hsv_u16_to_image; #[cfg(any(target_arch = "x86_64", target_arch = "x86"))] use crate::sse::sse_hsv_u16_to_image; use crate::{Hsl, Hsv}; +#[cfg(feature = "rayon")] +use rayon::iter::{IndexedParallelIterator, ParallelIterator}; +#[cfg(feature = "rayon")] +use rayon::prelude::{ParallelSlice, ParallelSliceMut}; #[allow(clippy::type_complexity)] fn hsv_u16_to_channels< @@ -50,77 +54,151 @@ fn hsv_u16_to_channels< Some(neon_hsv_u16_to_image::); } - let mut src_offset = 0usize; - let mut dst_offset = 0usize; - let channels = image_configuration.get_channels_count(); let scale = 1f32 / scale; - for _ in 0..height as usize { - let mut _cx = 0usize; - - if let Some(dispatcher) = _wide_row_handler { - unsafe { - _cx = dispatcher( - _cx, - src.as_ptr(), - src_offset, - width, - dst.as_mut_ptr(), - dst_offset, - scale, - ); - } - } + #[cfg(feature = "rayon")] + { + let src_slice_safe_align = unsafe { + slice::from_raw_parts( + src.as_ptr() as *const u8, + src_stride as usize * height as usize, + ) + }; + dst.par_chunks_exact_mut(dst_stride as usize) + .zip(src_slice_safe_align.par_chunks_exact(src_stride as usize)) + .for_each(|(dst, src)| unsafe { + let mut _cx = 0usize; + + if let Some(dispatcher) = _wide_row_handler { + _cx = dispatcher( + _cx, + src.as_ptr() as *const u16, + 0, + width, + dst.as_mut_ptr(), + 0, + scale, + ); + } - let src_ptr = unsafe { (src.as_ptr() as *const u8).add(src_offset) as *const u16 }; - let dst_ptr = unsafe { dst.as_mut_ptr().add(dst_offset) }; + let src_ptr = src.as_ptr() as *const u16; + let dst_ptr = dst.as_mut_ptr(); - let dst_slice = unsafe { slice::from_raw_parts_mut(dst_ptr, width as usize * channels) }; + let dst_slice = slice::from_raw_parts_mut(dst_ptr, width as usize * channels); - for x in _cx..width as usize { - let px = x * channels; - let src = unsafe { src_ptr.add(px) }; - let h = unsafe { src.read_unaligned() }; - let s = unsafe { src.add(1).read_unaligned() }; - let v = unsafe { src.add(2).read_unaligned() }; + for x in _cx..width as usize { + let px = x * channels; + let src = src_ptr.add(px); + let h = src.read_unaligned(); + let s = src.add(1).read_unaligned(); + let v = src.add(2).read_unaligned(); - let s_f = s as f32 * scale; - let v_f = v as f32 * scale; + let s_f = s as f32 * scale; + let v_f = v as f32 * scale; - let hx = x * channels; - let rgb = match target { - HsvTarget::Hsv => { - let hsv = Hsv::from_components(h as f32, s_f, v_f); - hsv.to_rgb8() + let hx = x * channels; + let rgb = match target { + HsvTarget::Hsv => { + let hsv = Hsv::from_components(h as f32, s_f, v_f); + hsv.to_rgb8() + } + HsvTarget::Hsl => { + let hsl = Hsl::from_components(h as f32, s_f, v_f); + hsl.to_rgb8() + } + }; + + *dst_slice.get_unchecked_mut(hx + image_configuration.get_r_channel_offset()) = + rgb.r; + *dst_slice.get_unchecked_mut(hx + image_configuration.get_g_channel_offset()) = + rgb.g; + *dst_slice.get_unchecked_mut(hx + image_configuration.get_b_channel_offset()) = + rgb.b; + + if image_configuration.has_alpha() { + let a = src.add(3).read_unaligned(); + *dst_slice + .get_unchecked_mut(hx + image_configuration.get_a_channel_offset()) = + a as u8; + } } - HsvTarget::Hsl => { - let hsl = Hsl::from_components(h as f32, s_f, v_f); - hsl.to_rgb8() + }); + } + + #[cfg(not(feature = "rayon"))] + { + let mut src_offset = 0usize; + let mut dst_offset = 0usize; + + for _ in 0..height as usize { + let mut _cx = 0usize; + + if let Some(dispatcher) = _wide_row_handler { + unsafe { + _cx = dispatcher( + _cx, + src.as_ptr(), + src_offset, + width, + dst.as_mut_ptr(), + dst_offset, + scale, + ); } - }; - - unsafe { - *dst_slice.get_unchecked_mut(hx + image_configuration.get_r_channel_offset()) = - rgb.r; - *dst_slice.get_unchecked_mut(hx + image_configuration.get_g_channel_offset()) = - rgb.g; - *dst_slice.get_unchecked_mut(hx + image_configuration.get_b_channel_offset()) = - rgb.b; } - if image_configuration.has_alpha() { - let a = unsafe { src.add(3).read_unaligned() }; + let src_ptr = unsafe { (src.as_ptr() as *const u8).add(src_offset) as *const u16 }; + let dst_ptr = unsafe { dst.as_mut_ptr().add(dst_offset) }; + + let dst_slice = + unsafe { slice::from_raw_parts_mut(dst_ptr, width as usize * channels) }; + + for x in _cx..width as usize { + let px = x * channels; + let src = unsafe { src_ptr.add(px) }; + let h = unsafe { src.read_unaligned() }; + let s = unsafe { src.add(1).read_unaligned() }; + let v = unsafe { src.add(2).read_unaligned() }; + + let s_f = s as f32 * scale; + let v_f = v as f32 * scale; + + let hx = x * channels; + let rgb = match target { + HsvTarget::Hsv => { + let hsv = Hsv::from_components(h as f32, s_f, v_f); + hsv.to_rgb8() + } + HsvTarget::Hsl => { + let hsl = Hsl::from_components(h as f32, s_f, v_f); + hsl.to_rgb8() + } + }; + unsafe { - *dst_slice.get_unchecked_mut(hx + image_configuration.get_a_channel_offset()) = - a as u8; + *dst_slice.get_unchecked_mut(hx + image_configuration.get_r_channel_offset()) = + rgb.r; + *dst_slice.get_unchecked_mut(hx + image_configuration.get_g_channel_offset()) = + rgb.g; + *dst_slice.get_unchecked_mut(hx + image_configuration.get_b_channel_offset()) = + rgb.b; + } + + if image_configuration.has_alpha() { + let a = unsafe { src.add(3).read_unaligned() }; + unsafe { + *dst_slice + .get_unchecked_mut(hx + image_configuration.get_a_channel_offset()) = + a as u8; + } } } - } - src_offset += src_stride as usize; - dst_offset += dst_stride as usize; + src_offset += src_stride as usize; + dst_offset += dst_stride as usize; + } } } diff --git a/src/image_to_hsv.rs b/src/image_to_hsv.rs index 1bb1939..3bc614c 100644 --- a/src/image_to_hsv.rs +++ b/src/image_to_hsv.rs @@ -12,6 +12,12 @@ use crate::neon::neon_channels_to_hsv_u16; #[cfg(any(target_arch = "x86_64", target_arch = "x86"))] use crate::sse::sse_channels_to_hsv_u16; use crate::Rgb; +#[cfg(feature = "rayon")] +use rayon::iter::{IndexedParallelIterator, ParallelIterator}; +#[cfg(feature = "rayon")] +use rayon::prelude::{ParallelSlice, ParallelSliceMut}; +#[cfg(feature = "rayon")] +use std::slice; #[allow(clippy::type_complexity)] fn channels_to_hsv_u16< @@ -49,82 +55,157 @@ fn channels_to_hsv_u16< Some(sse_channels_to_hsv_u16::); } - let mut src_offset = 0usize; - let mut dst_offset = 0usize; - let channels = image_configuration.get_channels_count(); - for _ in 0..height as usize { - let mut _cx = 0usize; + #[cfg(feature = "rayon")] + { + let dst_slice_safe_align = unsafe { + slice::from_raw_parts_mut( + dst.as_mut_ptr() as *mut u8, + dst_stride as usize * height as usize, + ) + }; + dst_slice_safe_align + .par_chunks_exact_mut(dst_stride as usize) + .zip(src.par_chunks_exact(src_stride as usize)) + .for_each(|(dst, src)| unsafe { + let mut _cx = 0usize; - if let Some(dispatcher) = _wide_row_handler { - unsafe { - _cx = dispatcher( - _cx, - src.as_ptr(), - src_offset, - width, - dst.as_mut_ptr(), - dst_offset, - scale, - ) - } - } + if let Some(dispatcher) = _wide_row_handler { + _cx = dispatcher( + _cx, + src.as_ptr(), + 0, + width, + dst.as_mut_ptr() as *mut u16, + 0, + scale, + ); + } - let src_ptr = unsafe { src.as_ptr().add(src_offset) }; - let dst_ptr = unsafe { (dst.as_mut_ptr() as *mut u8).add(dst_offset) as *mut u16 }; + let src_ptr = src.as_ptr(); + let dst_ptr = dst.as_mut_ptr() as *mut u16; - for x in _cx..width as usize { - let px = x * channels; - let src = unsafe { src_ptr.add(px) }; - let r = unsafe { - src.add(image_configuration.get_r_channel_offset()) - .read_unaligned() - }; - let g = unsafe { - src.add(image_configuration.get_g_channel_offset()) - .read_unaligned() - }; - let b = unsafe { - src.add(image_configuration.get_b_channel_offset()) - .read_unaligned() - }; + for x in _cx..width as usize { + let px = x * channels; + let src = src_ptr.add(px); + let r = src + .add(image_configuration.get_r_channel_offset()) + .read_unaligned(); + let g = src + .add(image_configuration.get_g_channel_offset()) + .read_unaligned(); + let b = src + .add(image_configuration.get_b_channel_offset()) + .read_unaligned(); - let rgb = Rgb::::new(r, g, b); - let hx = x * channels; - let dst = unsafe { dst_ptr.add(hx) }; - match target { - HsvTarget::Hsv => { - let hsv = rgb.to_hsv(); - unsafe { - dst.write_unaligned(hsv.h as u16); - dst.add(1).write_unaligned((hsv.s * scale).round() as u16); - dst.add(2).write_unaligned((hsv.v * scale).round() as u16); + let rgb = Rgb::::new(r, g, b); + let hx = x * channels; + let dst = dst_ptr.add(hx); + match target { + HsvTarget::Hsv => { + let hsv = rgb.to_hsv(); + + dst.write_unaligned(hsv.h as u16); + dst.add(1).write_unaligned((hsv.s * scale).round() as u16); + dst.add(2).write_unaligned((hsv.v * scale).round() as u16); + } + HsvTarget::Hsl => { + let hsl = rgb.to_hsl(); + + dst.write_unaligned(hsl.h as u16); + dst.add(1).write_unaligned((hsl.s * scale).round() as u16); + dst.add(2).write_unaligned((hsl.l * scale).round() as u16); + } } - } - HsvTarget::Hsl => { - let hsl = rgb.to_hsl(); - unsafe { - dst.write_unaligned(hsl.h as u16); - dst.add(1).write_unaligned((hsl.s * scale).round() as u16); - dst.add(2).write_unaligned((hsl.l * scale).round() as u16); + + if image_configuration.has_alpha() { + let a = src + .add(image_configuration.get_a_channel_offset()) + .read_unaligned(); + dst.add(3).write_unaligned(a as u16); } } + }); + } + + #[cfg(not(feature = "rayon"))] + { + let mut src_offset = 0usize; + let mut dst_offset = 0usize; + + for _ in 0..height as usize { + let mut _cx = 0usize; + + if let Some(dispatcher) = _wide_row_handler { + unsafe { + _cx = dispatcher( + _cx, + src.as_ptr(), + src_offset, + width, + dst.as_mut_ptr(), + dst_offset, + scale, + ) + } } - if image_configuration.has_alpha() { - let a = unsafe { - src.add(image_configuration.get_a_channel_offset()) + let src_ptr = unsafe { src.as_ptr().add(src_offset) }; + let dst_ptr = unsafe { (dst.as_mut_ptr() as *mut u8).add(dst_offset) as *mut u16 }; + + for x in _cx..width as usize { + let px = x * channels; + let src = unsafe { src_ptr.add(px) }; + let r = unsafe { + src.add(image_configuration.get_r_channel_offset()) .read_unaligned() }; - unsafe { - dst.add(3).write_unaligned(a as u16); + let g = unsafe { + src.add(image_configuration.get_g_channel_offset()) + .read_unaligned() + }; + let b = unsafe { + src.add(image_configuration.get_b_channel_offset()) + .read_unaligned() + }; + + let rgb = Rgb::::new(r, g, b); + let hx = x * channels; + let dst = unsafe { dst_ptr.add(hx) }; + match target { + HsvTarget::Hsv => { + let hsv = rgb.to_hsv(); + unsafe { + dst.write_unaligned(hsv.h as u16); + dst.add(1).write_unaligned((hsv.s * scale).round() as u16); + dst.add(2).write_unaligned((hsv.v * scale).round() as u16); + } + } + HsvTarget::Hsl => { + let hsl = rgb.to_hsl(); + unsafe { + dst.write_unaligned(hsl.h as u16); + dst.add(1).write_unaligned((hsl.s * scale).round() as u16); + dst.add(2).write_unaligned((hsl.l * scale).round() as u16); + } + } + } + + if image_configuration.has_alpha() { + let a = unsafe { + src.add(image_configuration.get_a_channel_offset()) + .read_unaligned() + }; + unsafe { + dst.add(3).write_unaligned(a as u16); + } } } - } - src_offset += src_stride as usize; - dst_offset += dst_stride as usize; + src_offset += src_stride as usize; + dst_offset += dst_stride as usize; + } } } diff --git a/src/image_to_jzazbz.rs b/src/image_to_jzazbz.rs index 241b5ad..dd7df02 100644 --- a/src/image_to_jzazbz.rs +++ b/src/image_to_jzazbz.rs @@ -10,6 +10,12 @@ use crate::neon::neon_image_to_jzazbz; #[cfg(any(target_arch = "x86_64", target_arch = "x86"))] use crate::sse::sse_image_to_jzazbz; use crate::{Jzazbz, Jzczhz, Rgb, TransferFunction}; +#[cfg(feature = "rayon")] +use rayon::iter::{IndexedParallelIterator, ParallelIterator}; +#[cfg(feature = "rayon")] +use rayon::prelude::{ParallelSlice, ParallelSliceMut}; +#[cfg(feature = "rayon")] +use std::slice; #[repr(u8)] #[derive(Copy, Clone, Ord, PartialOrd, Eq, PartialEq)] @@ -60,86 +66,180 @@ fn channels_to_jzaz( _wide_row_handle = Some(sse_image_to_jzazbz::); } - let mut src_offset = 0usize; - let mut dst_offset = 0usize; - - for _ in 0..height as usize { - let mut _cx = 0usize; - - let src_ptr = unsafe { src.as_ptr().add(src_offset) }; - let dst_ptr = unsafe { (dst.as_mut_ptr() as *mut u8).add(dst_offset) as *mut f32 }; - - if let Some(dispatcher) = _wide_row_handle { - unsafe { - _cx = dispatcher( - _cx, - src.as_ptr(), - src_offset, - width, - dst.as_mut_ptr(), - dst_offset, - display_luminance, - transfer_function, - ); - } - } + #[cfg(feature = "rayon")] + { + let dst_slice_safe_align = unsafe { + slice::from_raw_parts_mut( + dst.as_mut_ptr() as *mut u8, + dst_stride as usize * height as usize, + ) + }; + dst_slice_safe_align + .par_chunks_exact_mut(dst_stride as usize) + .zip(src.par_chunks_exact(src_stride as usize)) + .for_each(|(dst, src)| unsafe { + let mut _cx = 0usize; - for x in _cx..width as usize { - let px = x * channels; - - let src = unsafe { src_ptr.add(px) }; - let r = unsafe { - src.add(image_configuration.get_r_channel_offset()) - .read_unaligned() - }; - let g = unsafe { - src.add(image_configuration.get_g_channel_offset()) - .read_unaligned() - }; - let b = unsafe { - src.add(image_configuration.get_b_channel_offset()) - .read_unaligned() - }; - - let rgb = Rgb::::new(r, g, b); - - let dst_store = unsafe { dst_ptr.add(px) }; - - match target { - JzazbzTarget::Jzazbz => { - let jzazbz = - Jzazbz::from_rgb_with_luminance(rgb, display_luminance, transfer_function); - unsafe { - dst_store.write_unaligned(jzazbz.jz); - dst_store.add(1).write_unaligned(jzazbz.az); - dst_store.add(2).write_unaligned(jzazbz.bz); - } + let src_ptr = src.as_ptr(); + let dst_ptr = dst.as_mut_ptr() as *mut f32; + + if let Some(dispatcher) = _wide_row_handle { + _cx = dispatcher( + _cx, + src.as_ptr(), + 0, + width, + dst.as_mut_ptr() as *mut f32, + 0, + display_luminance, + transfer_function, + ); } - JzazbzTarget::Jzczhz => { - let jzczhz = - Jzczhz::from_rgb_with_luminance(rgb, display_luminance, transfer_function); - unsafe { - dst_store.write_unaligned(jzczhz.jz); - dst_store.add(1).write_unaligned(jzczhz.cz); - dst_store.add(2).write_unaligned(jzczhz.hz); + + for x in _cx..width as usize { + let px = x * channels; + + let src = src_ptr.add(px); + let r = src + .add(image_configuration.get_r_channel_offset()) + .read_unaligned(); + let g = src + .add(image_configuration.get_g_channel_offset()) + .read_unaligned(); + let b = src + .add(image_configuration.get_b_channel_offset()) + .read_unaligned(); + + let rgb = Rgb::::new(r, g, b); + + let dst_store = dst_ptr.add(px); + + match target { + JzazbzTarget::Jzazbz => { + let jzazbz = Jzazbz::from_rgb_with_luminance( + rgb, + display_luminance, + transfer_function, + ); + + dst_store.write_unaligned(jzazbz.jz); + dst_store.add(1).write_unaligned(jzazbz.az); + dst_store.add(2).write_unaligned(jzazbz.bz); + } + JzazbzTarget::Jzczhz => { + let jzczhz = Jzczhz::from_rgb_with_luminance( + rgb, + display_luminance, + transfer_function, + ); + + dst_store.write_unaligned(jzczhz.jz); + dst_store.add(1).write_unaligned(jzczhz.cz); + dst_store.add(2).write_unaligned(jzczhz.hz); + } + } + + if image_configuration.has_alpha() { + let a = src + .add(image_configuration.get_a_channel_offset()) + .read_unaligned(); + let a_lin = a as f32 * (1f32 / 255f32); + + dst_store.add(3).write_unaligned(a_lin); } } + }); + } + + #[cfg(not(feature = "rayon"))] + { + let mut src_offset = 0usize; + let mut dst_offset = 0usize; + + for _ in 0..height as usize { + let mut _cx = 0usize; + + let src_ptr = unsafe { src.as_ptr().add(src_offset) }; + let dst_ptr = unsafe { (dst.as_mut_ptr() as *mut u8).add(dst_offset) as *mut f32 }; + + if let Some(dispatcher) = _wide_row_handle { + unsafe { + _cx = dispatcher( + _cx, + src.as_ptr(), + src_offset, + width, + dst.as_mut_ptr(), + dst_offset, + display_luminance, + transfer_function, + ); + } } - if image_configuration.has_alpha() { - let a = unsafe { - src.add(image_configuration.get_a_channel_offset()) + for x in _cx..width as usize { + let px = x * channels; + + let src = unsafe { src_ptr.add(px) }; + let r = unsafe { + src.add(image_configuration.get_r_channel_offset()) .read_unaligned() }; - let a_lin = a as f32 * (1f32 / 255f32); - unsafe { - dst_store.add(3).write_unaligned(a_lin); + let g = unsafe { + src.add(image_configuration.get_g_channel_offset()) + .read_unaligned() + }; + let b = unsafe { + src.add(image_configuration.get_b_channel_offset()) + .read_unaligned() + }; + + let rgb = Rgb::::new(r, g, b); + + let dst_store = unsafe { dst_ptr.add(px) }; + + match target { + JzazbzTarget::Jzazbz => { + let jzazbz = Jzazbz::from_rgb_with_luminance( + rgb, + display_luminance, + transfer_function, + ); + unsafe { + dst_store.write_unaligned(jzazbz.jz); + dst_store.add(1).write_unaligned(jzazbz.az); + dst_store.add(2).write_unaligned(jzazbz.bz); + } + } + JzazbzTarget::Jzczhz => { + let jzczhz = Jzczhz::from_rgb_with_luminance( + rgb, + display_luminance, + transfer_function, + ); + unsafe { + dst_store.write_unaligned(jzczhz.jz); + dst_store.add(1).write_unaligned(jzczhz.cz); + dst_store.add(2).write_unaligned(jzczhz.hz); + } + } + } + + if image_configuration.has_alpha() { + let a = unsafe { + src.add(image_configuration.get_a_channel_offset()) + .read_unaligned() + }; + let a_lin = a as f32 * (1f32 / 255f32); + unsafe { + dst_store.add(3).write_unaligned(a_lin); + } } } - } - src_offset += src_stride as usize; - dst_offset += dst_stride as usize; + src_offset += src_stride as usize; + dst_offset += dst_stride as usize; + } } } diff --git a/src/image_to_lalphabeta.rs b/src/image_to_lalphabeta.rs index 40b7533..b59e30b 100644 --- a/src/image_to_lalphabeta.rs +++ b/src/image_to_lalphabeta.rs @@ -6,6 +6,12 @@ */ use crate::image::ImageConfiguration; use crate::{Rgb, TransferFunction}; +#[cfg(feature = "rayon")] +use rayon::iter::{IndexedParallelIterator, ParallelIterator}; +#[cfg(feature = "rayon")] +use rayon::prelude::{ParallelSlice, ParallelSliceMut}; +#[cfg(feature = "rayon")] +use std::slice; #[inline(always)] fn channels_to_lalphabeta( @@ -21,55 +27,107 @@ fn channels_to_lalphabeta( let channels = image_configuration.get_channels_count(); - let mut src_offset = 0usize; - let mut dst_offset = 0usize; - - for _ in 0..height as usize { - let mut _cx = 0usize; - - let src_ptr = unsafe { src.as_ptr().add(src_offset) }; - let dst_ptr = unsafe { (dst.as_mut_ptr() as *mut u8).add(dst_offset) as *mut f32 }; - - for x in _cx..width as usize { - let px = x * channels; - - let src = unsafe { src_ptr.add(px) }; - let r = unsafe { - src.add(image_configuration.get_r_channel_offset()) - .read_unaligned() - }; - let g = unsafe { - src.add(image_configuration.get_g_channel_offset()) - .read_unaligned() - }; - let b = unsafe { - src.add(image_configuration.get_b_channel_offset()) - .read_unaligned() - }; - - let rgb = Rgb::::new(r, g, b); - let dst_store = unsafe { dst_ptr.add(px) }; - let lalphabeta = rgb.to_lalphabeta(transfer_function); - unsafe { - dst_store.write_unaligned(lalphabeta.l); - dst_store.add(1).write_unaligned(lalphabeta.alpha); - dst_store.add(2).write_unaligned(lalphabeta.beta); - } + #[cfg(feature = "rayon")] + { + let dst_slice_safe_align = unsafe { + slice::from_raw_parts_mut( + dst.as_mut_ptr() as *mut u8, + dst_stride as usize * height as usize, + ) + }; + dst_slice_safe_align + .par_chunks_exact_mut(dst_stride as usize) + .zip(src.par_chunks_exact(src_stride as usize)) + .for_each(|(dst, src)| unsafe { + let mut _cx = 0usize; + + let src_ptr = src.as_ptr(); + let dst_ptr = dst.as_mut_ptr() as *mut f32; + + for x in _cx..width as usize { + let px = x * channels; + + let src = src_ptr.add(px); + let r = src + .add(image_configuration.get_r_channel_offset()) + .read_unaligned(); + let g = src + .add(image_configuration.get_g_channel_offset()) + .read_unaligned(); + let b = src + .add(image_configuration.get_b_channel_offset()) + .read_unaligned(); + + let rgb = Rgb::::new(r, g, b); + let dst_store = dst_ptr.add(px); + let lalphabeta = rgb.to_lalphabeta(transfer_function); + dst_store.write_unaligned(lalphabeta.l); + dst_store.add(1).write_unaligned(lalphabeta.alpha); + dst_store.add(2).write_unaligned(lalphabeta.beta); + + if image_configuration.has_alpha() { + let a = src + .add(image_configuration.get_a_channel_offset()) + .read_unaligned(); + let a_lin = a as f32 * (1f32 / 255f32); + dst_store.add(3).write_unaligned(a_lin); + } + } + }); + } + + #[cfg(not(feature = "rayon"))] + { + let mut src_offset = 0usize; + let mut dst_offset = 0usize; + + for _ in 0..height as usize { + let mut _cx = 0usize; + + let src_ptr = unsafe { src.as_ptr().add(src_offset) }; + let dst_ptr = unsafe { (dst.as_mut_ptr() as *mut u8).add(dst_offset) as *mut f32 }; - if image_configuration.has_alpha() { - let a = unsafe { - src.add(image_configuration.get_a_channel_offset()) + for x in _cx..width as usize { + let px = x * channels; + + let src = unsafe { src_ptr.add(px) }; + let r = unsafe { + src.add(image_configuration.get_r_channel_offset()) + .read_unaligned() + }; + let g = unsafe { + src.add(image_configuration.get_g_channel_offset()) + .read_unaligned() + }; + let b = unsafe { + src.add(image_configuration.get_b_channel_offset()) .read_unaligned() }; - let a_lin = a as f32 * (1f32 / 255f32); + + let rgb = Rgb::::new(r, g, b); + let dst_store = unsafe { dst_ptr.add(px) }; + let lalphabeta = rgb.to_lalphabeta(transfer_function); unsafe { - dst_store.add(3).write_unaligned(a_lin); + dst_store.write_unaligned(lalphabeta.l); + dst_store.add(1).write_unaligned(lalphabeta.alpha); + dst_store.add(2).write_unaligned(lalphabeta.beta); + } + + if image_configuration.has_alpha() { + let a = unsafe { + src.add(image_configuration.get_a_channel_offset()) + .read_unaligned() + }; + let a_lin = a as f32 * (1f32 / 255f32); + unsafe { + dst_store.add(3).write_unaligned(a_lin); + } } } - } - src_offset += src_stride as usize; - dst_offset += dst_stride as usize; + src_offset += src_stride as usize; + dst_offset += dst_stride as usize; + } } } diff --git a/src/image_to_linear.rs b/src/image_to_linear.rs index f3acb35..62d19ec 100644 --- a/src/image_to_linear.rs +++ b/src/image_to_linear.rs @@ -4,7 +4,6 @@ * // Use of this source code is governed by a BSD-style * // license that can be found in the LICENSE file. */ - #[cfg(any(target_arch = "x86_64", target_arch = "x86"))] use crate::avx::avx_channels_to_linear; use crate::gamma_curves::TransferFunction; @@ -14,6 +13,12 @@ use crate::neon::neon_channels_to_linear; #[cfg(any(target_arch = "x86_64", target_arch = "x86"))] use crate::sse::*; use crate::Rgb; +#[cfg(feature = "rayon")] +use rayon::iter::{IndexedParallelIterator, ParallelIterator}; +#[cfg(feature = "rayon")] +use rayon::prelude::{ParallelSlice, ParallelSliceMut}; +#[cfg(feature = "rayon")] +use std::slice; #[allow(clippy::type_complexity)] fn channels_to_linear( @@ -51,70 +56,136 @@ fn channels_to_linear( _wide_row_handle = Some(neon_channels_to_linear::); } - let mut src_offset = 0usize; - let mut dst_offset = 0usize; - for _ in 0..height as usize { - let mut _cx = 0usize; - - if let Some(dispatcher) = _wide_row_handle { - unsafe { - _cx = dispatcher( - _cx, - src.as_ptr(), - src_offset, - width, - dst.as_mut_ptr(), - dst_offset, - transfer_function, - ) - } - } + #[cfg(not(feature = "rayon"))] + { + let mut src_offset = 0usize; + let mut dst_offset = 0usize; + for _ in 0..height as usize { + let mut _cx = 0usize; - let src_ptr = unsafe { src.as_ptr().add(src_offset) }; - let dst_ptr = unsafe { (dst.as_mut_ptr() as *mut u8).add(dst_offset) as *mut f32 }; - - for x in _cx..width as usize { - let px = x * channels; - let dst = unsafe { dst_ptr.add(px) }; - let src = unsafe { src_ptr.add(px) }; - let r = unsafe { - src.add(image_configuration.get_r_channel_offset()) - .read_unaligned() - }; - let g = unsafe { - src.add(image_configuration.get_g_channel_offset()) - .read_unaligned() - }; - let b = unsafe { - src.add(image_configuration.get_b_channel_offset()) - .read_unaligned() - }; - - let rgb = Rgb::::new(r, g, b); - let rgb_f32 = rgb.to_rgb_f32(); - - unsafe { - dst.write_unaligned(transfer_function.linearize(rgb_f32.r)); - dst.add(1) - .write_unaligned(transfer_function.linearize(rgb_f32.g)); - dst.add(2) - .write_unaligned(transfer_function.linearize(rgb_f32.b)); + if let Some(dispatcher) = _wide_row_handle { + unsafe { + _cx = dispatcher( + _cx, + src.as_ptr(), + src_offset, + width, + dst.as_mut_ptr(), + dst_offset, + transfer_function, + ) + } } - if USE_ALPHA && image_configuration.has_alpha() { - let a = unsafe { - src.add(image_configuration.get_a_channel_offset()) + let src_ptr = unsafe { src.as_ptr().add(src_offset) }; + let dst_ptr = unsafe { (dst.as_mut_ptr() as *mut u8).add(dst_offset) as *mut f32 }; + + for x in _cx..width as usize { + let px = x * channels; + let dst = unsafe { dst_ptr.add(px) }; + let src = unsafe { src_ptr.add(px) }; + let r = unsafe { + src.add(image_configuration.get_r_channel_offset()) .read_unaligned() }; - let a_lin = a as f32 * (1f32 / 255f32); + let g = unsafe { + src.add(image_configuration.get_g_channel_offset()) + .read_unaligned() + }; + let b = unsafe { + src.add(image_configuration.get_b_channel_offset()) + .read_unaligned() + }; + + let rgb = Rgb::::new(r, g, b); + let rgb_f32 = rgb.to_rgb_f32(); + unsafe { - dst.add(3).write_unaligned(a_lin); + dst.write_unaligned(transfer_function.linearize(rgb_f32.r)); + dst.add(1) + .write_unaligned(transfer_function.linearize(rgb_f32.g)); + dst.add(2) + .write_unaligned(transfer_function.linearize(rgb_f32.b)); + } + + if USE_ALPHA && image_configuration.has_alpha() { + let a = unsafe { + src.add(image_configuration.get_a_channel_offset()) + .read_unaligned() + }; + let a_lin = a as f32 * (1f32 / 255f32); + unsafe { + dst.add(3).write_unaligned(a_lin); + } } } + + src_offset += src_stride as usize; + dst_offset += dst_stride as usize; } + } - src_offset += src_stride as usize; - dst_offset += dst_stride as usize; + #[cfg(feature = "rayon")] + { + let dst_slice_safe_align = unsafe { + slice::from_raw_parts_mut( + dst.as_mut_ptr() as *mut u8, + dst_stride as usize * height as usize, + ) + }; + dst_slice_safe_align + .par_chunks_exact_mut(dst_stride as usize) + .zip(src.par_chunks_exact(src_stride as usize)) + .for_each(|(dst_row, src_row)| unsafe { + let mut _cx = 0usize; + + if let Some(dispatcher) = _wide_row_handle { + _cx = dispatcher( + _cx, + src_row.as_ptr(), + 0, + width, + dst_row.as_mut_ptr() as *mut f32, + 0, + transfer_function, + ) + } + + let src_ptr = src_row.as_ptr(); + let dst_ptr = dst_row.as_mut_ptr() as *mut f32; + + for x in _cx..width as usize { + let px = x * channels; + let dst = dst_ptr.add(px); + let src = src_ptr.add(px); + let r = src + .add(image_configuration.get_r_channel_offset()) + .read_unaligned(); + let g = src + .add(image_configuration.get_g_channel_offset()) + .read_unaligned(); + let b = src + .add(image_configuration.get_b_channel_offset()) + .read_unaligned(); + + let rgb = Rgb::::new(r, g, b); + let rgb_f32 = rgb.to_rgb_f32(); + + dst.write_unaligned(transfer_function.linearize(rgb_f32.r)); + dst.add(1) + .write_unaligned(transfer_function.linearize(rgb_f32.g)); + dst.add(2) + .write_unaligned(transfer_function.linearize(rgb_f32.b)); + + if USE_ALPHA && image_configuration.has_alpha() { + let a = src + .add(image_configuration.get_a_channel_offset()) + .read_unaligned(); + let a_lin = a as f32 * (1f32 / 255f32); + dst.add(3).write_unaligned(a_lin); + } + } + }); } } diff --git a/src/image_to_linear_u8.rs b/src/image_to_linear_u8.rs index d4c6063..19e3502 100644 --- a/src/image_to_linear_u8.rs +++ b/src/image_to_linear_u8.rs @@ -4,9 +4,6 @@ * // Use of this source code is governed by a BSD-style * // license that can be found in the LICENSE file. */ - -use std::slice; - use crate::gamma_curves::TransferFunction; use crate::image::ImageConfiguration; #[cfg(all(target_arch = "aarch64", target_feature = "neon"))] @@ -14,15 +11,19 @@ use crate::neon::*; #[cfg(any(target_arch = "x86_64", target_arch = "x86"))] use crate::sse::sse_image_to_linear_unsigned::sse_channels_to_linear_u8; use crate::Rgb; +#[cfg(feature = "rayon")] +use rayon::iter::{IndexedParallelIterator, ParallelIterator}; +#[cfg(feature = "rayon")] +use rayon::prelude::{ParallelSlice, ParallelSliceMut}; #[allow(clippy::type_complexity)] fn channels_to_linear( - src: &[u8], + l_src: &[u8], src_stride: u32, - dst: &mut [u8], + l_dst: &mut [u8], dst_stride: u32, width: u32, - height: u32, + _: u32, transfer_function: TransferFunction, ) { let image_configuration: ImageConfiguration = CHANNELS_CONFIGURATION.into(); @@ -30,9 +31,6 @@ fn channels_to_linear( panic!("Alpha may be set only on images with alpha"); } - let mut src_offset = 0usize; - let mut dst_offset = 0usize; - let channels = image_configuration.get_channels_count(); let mut _wide_row_handler: Option< @@ -51,41 +49,35 @@ fn channels_to_linear( Some(sse_channels_to_linear_u8::); } - for _ in 0..height as usize { + #[cfg(not(feature = "rayon"))] + for (dst_row, src_row) in l_dst + .chunks_exact_mut(dst_stride as usize) + .zip(l_src.chunks_exact(src_stride as usize)) + { let mut _cx = 0usize; if let Some(dispatcher) = _wide_row_handler { unsafe { _cx = dispatcher( _cx, - src.as_ptr(), - src_offset, + src_row.as_ptr(), + 0, width, - dst.as_mut_ptr(), - dst_offset, + dst_row.as_mut_ptr(), + 0, transfer_function, ) } } - let src_ptr = unsafe { src.as_ptr().add(src_offset) }; - let dst_ptr = unsafe { dst.as_mut_ptr().add(dst_offset) }; - - let src_slice = unsafe { slice::from_raw_parts(src_ptr, width as usize * channels) }; - let dst_slice: &mut [u8] = - unsafe { slice::from_raw_parts_mut(dst_ptr, width as usize * channels) }; - for x in _cx..width as usize { let px = x * channels; - let r = unsafe { - *src_slice.get_unchecked(px + image_configuration.get_r_channel_offset()) - }; - let g = unsafe { - *src_slice.get_unchecked(px + image_configuration.get_g_channel_offset()) - }; - let b = unsafe { - *src_slice.get_unchecked(px + image_configuration.get_b_channel_offset()) - }; + let r = + unsafe { *src_row.get_unchecked(px + image_configuration.get_r_channel_offset()) }; + let g = + unsafe { *src_row.get_unchecked(px + image_configuration.get_g_channel_offset()) }; + let b = + unsafe { *src_row.get_unchecked(px + image_configuration.get_b_channel_offset()) }; let rgb = Rgb::::new(r, g, b); let mut rgb_f32 = rgb.to_rgb_f32(); @@ -93,23 +85,64 @@ fn channels_to_linear( let rgb = rgb_f32.to_u8(); unsafe { - *dst_slice.get_unchecked_mut(px) = rgb.r; - *dst_slice.get_unchecked_mut(px + 1) = rgb.g; - *dst_slice.get_unchecked_mut(px + 2) = rgb.b; + *dst_row.get_unchecked_mut(px) = rgb.r; + *dst_row.get_unchecked_mut(px + 1) = rgb.g; + *dst_row.get_unchecked_mut(px + 2) = rgb.b; } if USE_ALPHA && image_configuration.has_alpha() { let a = unsafe { - *src_slice.get_unchecked(px + image_configuration.get_a_channel_offset()) + *src_row.get_unchecked(px + image_configuration.get_a_channel_offset()) }; unsafe { - *dst_slice.get_unchecked_mut(px + 3) = a; + *dst_row.get_unchecked_mut(px + 3) = a; } } } + } + + #[cfg(feature = "rayon")] + { + l_dst + .par_chunks_exact_mut(dst_stride as usize) + .zip(l_src.par_chunks_exact(src_stride as usize)) + .for_each(|(dst_row, src_row)| unsafe { + let mut _cx = 0usize; + + if let Some(dispatcher) = _wide_row_handler { + _cx = dispatcher( + _cx, + src_row.as_ptr(), + 0, + width, + dst_row.as_mut_ptr(), + 0, + transfer_function, + ) + } - src_offset += src_stride as usize; - dst_offset += dst_stride as usize; + for x in _cx..width as usize { + let px = x * channels; + let r = *src_row.get_unchecked(px + image_configuration.get_r_channel_offset()); + let g = *src_row.get_unchecked(px + image_configuration.get_g_channel_offset()); + let b = *src_row.get_unchecked(px + image_configuration.get_b_channel_offset()); + + let rgb = Rgb::::new(r, g, b); + let mut rgb_f32 = rgb.to_rgb_f32(); + rgb_f32 = rgb_f32.linearize(transfer_function); + let rgb = rgb_f32.to_u8(); + + *dst_row.get_unchecked_mut(px) = rgb.r; + *dst_row.get_unchecked_mut(px + 1) = rgb.g; + *dst_row.get_unchecked_mut(px + 2) = rgb.b; + + if USE_ALPHA && image_configuration.has_alpha() { + let a = + *src_row.get_unchecked(px + image_configuration.get_a_channel_offset()); + *dst_row.get_unchecked_mut(px + 3) = a; + } + } + }); } } diff --git a/src/image_to_oklab.rs b/src/image_to_oklab.rs index a61f752..25ceb67 100644 --- a/src/image_to_oklab.rs +++ b/src/image_to_oklab.rs @@ -13,6 +13,12 @@ use crate::oklch::Oklch; #[cfg(any(target_arch = "x86_64", target_arch = "x86"))] use crate::sse::sse_image_to_oklab; use crate::{Oklab, Rgb, TransferFunction}; +#[cfg(feature = "rayon")] +use rayon::iter::{IndexedParallelIterator, ParallelIterator}; +#[cfg(feature = "rayon")] +use rayon::prelude::{ParallelSlice, ParallelSliceMut}; +#[cfg(feature = "rayon")] +use std::slice; #[derive(Copy, Clone, Ord, PartialOrd, Eq, PartialEq)] pub(crate) enum OklabTarget { @@ -66,82 +72,150 @@ fn channels_to_oklab( _wide_row_handle = Some(avx_image_to_oklab::); } - let mut src_offset = 0usize; - let mut dst_offset = 0usize; - - for _ in 0..height as usize { - let mut _cx = 0usize; - - let src_ptr = unsafe { src.as_ptr().add(src_offset) }; - let dst_ptr = unsafe { (dst.as_mut_ptr() as *mut u8).add(dst_offset) as *mut f32 }; - - if let Some(dispatcher) = _wide_row_handle { - unsafe { - _cx = dispatcher( - _cx, - src.as_ptr(), - src_offset, - width, - dst.as_mut_ptr(), - dst_offset, - transfer_function, - ) - } - } + #[cfg(feature = "rayon")] + { + let dst_slice_safe_align = unsafe { + slice::from_raw_parts_mut( + dst.as_mut_ptr() as *mut u8, + dst_stride as usize * height as usize, + ) + }; - for x in _cx..width as usize { - let px = x * channels; - - let src = unsafe { src_ptr.add(px) }; - let r = unsafe { - src.add(image_configuration.get_r_channel_offset()) - .read_unaligned() - }; - let g = unsafe { - src.add(image_configuration.get_g_channel_offset()) - .read_unaligned() - }; - let b = unsafe { - src.add(image_configuration.get_b_channel_offset()) - .read_unaligned() - }; - - let rgb = Rgb::::new(r, g, b); - let dst_store = unsafe { dst_ptr.add(px) }; - - match target { - OklabTarget::Oklab => { - let oklab = Oklab::from_rgb(rgb, transfer_function); - unsafe { - dst_store.write_unaligned(oklab.l); - dst_store.add(1).write_unaligned(oklab.a); - dst_store.add(2).write_unaligned(oklab.b); - } + dst_slice_safe_align + .par_chunks_exact_mut(dst_stride as usize) + .zip(src.par_chunks_exact(src_stride as usize)) + .for_each(|(dst, src)| unsafe { + let mut _cx = 0usize; + + let src_ptr = src.as_ptr(); + let dst_ptr = dst.as_mut_ptr() as *mut f32; + + if let Some(dispatcher) = _wide_row_handle { + _cx = dispatcher(_cx, src.as_ptr(), 0, width, dst_ptr, 0, transfer_function) } - OklabTarget::Oklch => { - let oklch = Oklch::from_rgb(rgb, transfer_function); - unsafe { - dst_store.write_unaligned(oklch.l); - dst_store.add(1).write_unaligned(oklch.c); - dst_store.add(2).write_unaligned(oklch.h); + + for x in _cx..width as usize { + let px = x * channels; + + let src = src_ptr.add(px); + let r = src + .add(image_configuration.get_r_channel_offset()) + .read_unaligned(); + let g = src + .add(image_configuration.get_g_channel_offset()) + .read_unaligned(); + let b = src + .add(image_configuration.get_b_channel_offset()) + .read_unaligned(); + + let rgb = Rgb::::new(r, g, b); + let dst_store = dst_ptr.add(px); + + match target { + OklabTarget::Oklab => { + let oklab = Oklab::from_rgb(rgb, transfer_function); + dst_store.write_unaligned(oklab.l); + dst_store.add(1).write_unaligned(oklab.a); + dst_store.add(2).write_unaligned(oklab.b); + } + OklabTarget::Oklch => { + let oklch = Oklch::from_rgb(rgb, transfer_function); + dst_store.write_unaligned(oklch.l); + dst_store.add(1).write_unaligned(oklch.c); + dst_store.add(2).write_unaligned(oklch.h); + } } + + if image_configuration.has_alpha() { + let a = src + .add(image_configuration.get_a_channel_offset()) + .read_unaligned(); + let a_lin = a as f32 * (1f32 / 255f32); + dst_store.add(3).write_unaligned(a_lin); + } + } + }); + } + + #[cfg(not(feature = "rayon"))] + { + let mut src_offset = 0usize; + let mut dst_offset = 0usize; + + for _ in 0..height as usize { + let mut _cx = 0usize; + + let src_ptr = unsafe { src.as_ptr().add(src_offset) }; + let dst_ptr = unsafe { (dst.as_mut_ptr() as *mut u8).add(dst_offset) as *mut f32 }; + + if let Some(dispatcher) = _wide_row_handle { + unsafe { + _cx = dispatcher( + _cx, + src.as_ptr(), + src_offset, + width, + dst.as_mut_ptr(), + dst_offset, + transfer_function, + ) } } - if image_configuration.has_alpha() { - let a = unsafe { - src.add(image_configuration.get_a_channel_offset()) + for x in _cx..width as usize { + let px = x * channels; + + let src = unsafe { src_ptr.add(px) }; + let r = unsafe { + src.add(image_configuration.get_r_channel_offset()) .read_unaligned() }; - let a_lin = a as f32 * (1f32 / 255f32); - unsafe { - dst_store.add(3).write_unaligned(a_lin); + let g = unsafe { + src.add(image_configuration.get_g_channel_offset()) + .read_unaligned() + }; + let b = unsafe { + src.add(image_configuration.get_b_channel_offset()) + .read_unaligned() + }; + + let rgb = Rgb::::new(r, g, b); + let dst_store = unsafe { dst_ptr.add(px) }; + + match target { + OklabTarget::Oklab => { + let oklab = Oklab::from_rgb(rgb, transfer_function); + unsafe { + dst_store.write_unaligned(oklab.l); + dst_store.add(1).write_unaligned(oklab.a); + dst_store.add(2).write_unaligned(oklab.b); + } + } + OklabTarget::Oklch => { + let oklch = Oklch::from_rgb(rgb, transfer_function); + unsafe { + dst_store.write_unaligned(oklch.l); + dst_store.add(1).write_unaligned(oklch.c); + dst_store.add(2).write_unaligned(oklch.h); + } + } + } + + if image_configuration.has_alpha() { + let a = unsafe { + src.add(image_configuration.get_a_channel_offset()) + .read_unaligned() + }; + let a_lin = a as f32 * (1f32 / 255f32); + unsafe { + dst_store.add(3).write_unaligned(a_lin); + } } } - } - src_offset += src_stride as usize; - dst_offset += dst_stride as usize; + src_offset += src_stride as usize; + dst_offset += dst_stride as usize; + } } } diff --git a/src/image_to_sigmoidal.rs b/src/image_to_sigmoidal.rs index d71512c..af1899e 100644 --- a/src/image_to_sigmoidal.rs +++ b/src/image_to_sigmoidal.rs @@ -14,6 +14,12 @@ use crate::neon::neon_image_to_sigmoidal; #[cfg(any(target_arch = "x86_64", target_arch = "x86"))] use crate::sse::sse_image_to_sigmoidal_row; use crate::Rgb; +#[cfg(feature = "rayon")] +use rayon::iter::{IndexedParallelIterator, ParallelIterator}; +#[cfg(feature = "rayon")] +use rayon::prelude::{ParallelSlice, ParallelSliceMut}; +#[cfg(feature = "rayon")] +use std::slice; #[allow(clippy::type_complexity)] fn image_to_sigmoidal( @@ -29,9 +35,6 @@ fn image_to_sigmoidal( panic!("Alpha may be set only on images with alpha"); } - let mut src_offset = 0usize; - let mut dst_offset = 0usize; - let channels = image_configuration.get_channels_count(); let mut _wide_row_handler: Option usize> = None; @@ -53,58 +56,120 @@ fn image_to_sigmoidal( const COLOR_SCALE: f32 = 1f32 / 255f32; - for _ in 0..height as usize { - let mut _cx = 0usize; + #[cfg(feature = "rayon")] + { + let dst_slice_safe_align = unsafe { + slice::from_raw_parts_mut( + dst.as_mut_ptr() as *mut u8, + dst_stride as usize * height as usize, + ) + }; + + dst_slice_safe_align + .par_chunks_exact_mut(dst_stride as usize) + .zip(src.par_chunks_exact(src_stride as usize)) + .for_each(|(dst, src)| unsafe { + let mut _cx = 0usize; + + let src_ptr = src.as_ptr(); + let dst_ptr = dst.as_mut_ptr() as *mut f32; + + if let Some(dispatcher) = _wide_row_handler { + _cx = dispatcher(_cx, src_ptr, width, dst_ptr); + } - let src_ptr = unsafe { src.as_ptr().add(src_offset) }; - let dst_ptr = unsafe { (dst.as_mut_ptr() as *mut u8).add(dst_offset) as *mut f32 }; + for x in _cx..width as usize { + let px = x * channels; + let src = src_ptr.add(px); + let r = src + .add(image_configuration.get_r_channel_offset()) + .read_unaligned(); + let g = src + .add(image_configuration.get_g_channel_offset()) + .read_unaligned(); + let b = src + .add(image_configuration.get_b_channel_offset()) + .read_unaligned(); + + let rgb = Rgb::::new(r, g, b); + + let writing_ptr = dst_ptr.add(px); + + let sigmoidal = rgb.to_sigmoidal(); + writing_ptr.write_unaligned(sigmoidal.sr); + writing_ptr.add(1).write_unaligned(sigmoidal.sg); + writing_ptr.add(2).write_unaligned(sigmoidal.sb); + + if image_configuration.has_alpha() { + let a = src + .add(image_configuration.get_a_channel_offset()) + .read_unaligned() as f32 + * COLOR_SCALE; + + writing_ptr.add(3).write_unaligned(a); + } + } + }); + } - if let Some(dispatcher) = _wide_row_handler { - unsafe { _cx = dispatcher(_cx, src_ptr, width, dst_ptr) } - } + #[cfg(not(feature = "rayon"))] + { + let mut src_offset = 0usize; + let mut dst_offset = 0usize; + + for _ in 0..height as usize { + let mut _cx = 0usize; + + let src_ptr = unsafe { src.as_ptr().add(src_offset) }; + let dst_ptr = unsafe { (dst.as_mut_ptr() as *mut u8).add(dst_offset) as *mut f32 }; - for x in _cx..width as usize { - let px = x * channels; - let src = unsafe { src_ptr.add(px) }; - let r = unsafe { - src.add(image_configuration.get_r_channel_offset()) - .read_unaligned() - }; - let g = unsafe { - src.add(image_configuration.get_g_channel_offset()) - .read_unaligned() - }; - let b = unsafe { - src.add(image_configuration.get_b_channel_offset()) - .read_unaligned() - }; - - let rgb = Rgb::::new(r, g, b); - - let writing_ptr = unsafe { dst_ptr.add(px) }; - - let sigmoidal = rgb.to_sigmoidal(); - unsafe { - writing_ptr.write_unaligned(sigmoidal.sr); - writing_ptr.add(1).write_unaligned(sigmoidal.sg); - writing_ptr.add(2).write_unaligned(sigmoidal.sb); + if let Some(dispatcher) = _wide_row_handler { + unsafe { _cx = dispatcher(_cx, src_ptr, width, dst_ptr) } } - if image_configuration.has_alpha() { - let a = unsafe { - src.add(image_configuration.get_a_channel_offset()) + for x in _cx..width as usize { + let px = x * channels; + let src = unsafe { src_ptr.add(px) }; + let r = unsafe { + src.add(image_configuration.get_r_channel_offset()) .read_unaligned() - } as f32 - * COLOR_SCALE; + }; + let g = unsafe { + src.add(image_configuration.get_g_channel_offset()) + .read_unaligned() + }; + let b = unsafe { + src.add(image_configuration.get_b_channel_offset()) + .read_unaligned() + }; + + let rgb = Rgb::::new(r, g, b); + let writing_ptr = unsafe { dst_ptr.add(px) }; + + let sigmoidal = rgb.to_sigmoidal(); unsafe { - writing_ptr.add(3).write_unaligned(a); + writing_ptr.write_unaligned(sigmoidal.sr); + writing_ptr.add(1).write_unaligned(sigmoidal.sg); + writing_ptr.add(2).write_unaligned(sigmoidal.sb); + } + + if image_configuration.has_alpha() { + let a = unsafe { + src.add(image_configuration.get_a_channel_offset()) + .read_unaligned() + } as f32 + * COLOR_SCALE; + + unsafe { + writing_ptr.add(3).write_unaligned(a); + } } } - } - src_offset += src_stride as usize; - dst_offset += dst_stride as usize; + src_offset += src_stride as usize; + dst_offset += dst_stride as usize; + } } } diff --git a/src/image_to_xyz_lab.rs b/src/image_to_xyz_lab.rs index d5f3f70..39d0368 100644 --- a/src/image_to_xyz_lab.rs +++ b/src/image_to_xyz_lab.rs @@ -4,7 +4,6 @@ * // Use of this source code is governed by a BSD-style * // license that can be found in the LICENSE file. */ - #[cfg(any(target_arch = "x86_64", target_arch = "x86"))] use crate::avx::avx2_image_to_xyz_lab; use crate::gamma_curves::TransferFunction; @@ -15,6 +14,12 @@ use crate::neon::neon_channels_to_xyz_or_lab; use crate::sse::sse_channels_to_xyz_or_lab; use crate::xyz_target::XyzTarget; use crate::{Rgb, Xyz, SRGB_TO_XYZ_D65}; +#[cfg(feature = "rayon")] +use rayon::iter::{IndexedParallelIterator, ParallelIterator}; +#[cfg(feature = "rayon")] +use rayon::prelude::{ParallelSlice, ParallelSliceMut}; +#[cfg(feature = "rayon")] +use std::slice; #[allow(clippy::type_complexity)] fn channels_to_xyz( @@ -35,10 +40,6 @@ fn channels_to_xyz); } - for _ in 0..height as usize { - let mut _cx = 0usize; - - if let Some(dispatcher) = _wide_row_handler { - unsafe { - _cx = dispatcher( - _cx, - src.as_ptr(), - src_offset, - width, - dst.as_mut_ptr(), - dst_offset, - a_channel.as_mut_ptr(), - a_offset, - matrix, - transfer_function, - ); - } - } - - let src_ptr = unsafe { src.as_ptr().add(src_offset) }; - let dst_ptr = unsafe { (dst.as_mut_ptr() as *mut u8).add(dst_offset) as *mut f32 }; + #[cfg(feature = "rayon")] + { + let dst_slice_safe_align = unsafe { + slice::from_raw_parts_mut( + dst.as_mut_ptr() as *mut u8, + dst_stride as usize * height as usize, + ) + }; - for x in _cx..width as usize { - let px = x * channels; - let src = unsafe { src_ptr.add(px) }; - let r = unsafe { - src.add(image_configuration.get_r_channel_offset()) - .read_unaligned() - }; - let g = unsafe { - src.add(image_configuration.get_g_channel_offset()) - .read_unaligned() - }; - let b = unsafe { - src.add(image_configuration.get_b_channel_offset()) - .read_unaligned() + if USE_ALPHA { + let a_slice_safe_align = unsafe { + slice::from_raw_parts_mut( + a_channel.as_mut_ptr() as *mut u8, + a_stride as usize * height as usize, + ) }; - let rgb = Rgb::::new(r, g, b); - let ptr = unsafe { dst_ptr.add(x * 3) }; - match target { - XyzTarget::Lab => { - let lab = rgb.to_lab(); - unsafe { - ptr.write_unaligned(lab.l); - ptr.add(1).write_unaligned(lab.a); - ptr.add(2).write_unaligned(lab.b); + dst_slice_safe_align + .par_chunks_exact_mut(dst_stride as usize) + .zip(src.par_chunks_exact(src_stride as usize)) + .zip(a_slice_safe_align.par_chunks_exact_mut(a_stride as usize)) + .for_each(|((dst, src), a_channel)| unsafe { + let mut _cx = 0usize; + + if let Some(dispatcher) = _wide_row_handler { + _cx = dispatcher( + _cx, + src.as_ptr(), + 0, + width, + dst.as_mut_ptr() as *mut f32, + 0, + a_channel.as_mut_ptr() as *mut f32, + 0, + matrix, + transfer_function, + ); } - } - XyzTarget::Xyz => { - let xyz = Xyz::from_rgb(rgb, matrix, transfer_function); - unsafe { - ptr.write_unaligned(xyz.x); - ptr.add(1).write_unaligned(xyz.y); - ptr.add(2).write_unaligned(xyz.z); + + let src_ptr = src.as_ptr().add(0); + let dst_ptr = dst.as_mut_ptr().add(0) as *mut f32; + + for x in _cx..width as usize { + let px = x * channels; + let src = src_ptr.add(px); + let r = src + .add(image_configuration.get_r_channel_offset()) + .read_unaligned(); + let g = src + .add(image_configuration.get_g_channel_offset()) + .read_unaligned(); + let b = src + .add(image_configuration.get_b_channel_offset()) + .read_unaligned(); + + let rgb = Rgb::::new(r, g, b); + let ptr = dst_ptr.add(x * 3); + match target { + XyzTarget::Lab => { + let lab = rgb.to_lab(); + ptr.write_unaligned(lab.l); + ptr.add(1).write_unaligned(lab.a); + ptr.add(2).write_unaligned(lab.b); + } + XyzTarget::Xyz => { + let xyz = Xyz::from_rgb(rgb, matrix, transfer_function); + ptr.write_unaligned(xyz.x); + ptr.add(1).write_unaligned(xyz.y); + ptr.add(2).write_unaligned(xyz.z); + } + XyzTarget::Luv => { + let luv = rgb.to_luv(); + ptr.write_unaligned(luv.l); + ptr.add(1).write_unaligned(luv.u); + ptr.add(2).write_unaligned(luv.v); + } + XyzTarget::Lch => { + let lch = rgb.to_lch(); + ptr.write_unaligned(lch.l); + ptr.add(1).write_unaligned(lch.c); + ptr.add(2).write_unaligned(lch.h); + } + } + + if USE_ALPHA && image_configuration.has_alpha() { + let a = src + .add(image_configuration.get_a_channel_offset()) + .read_unaligned(); + let a_lin = a as f32 * (1f32 / 255f32); + let a_ptr = a_channel.as_mut_ptr() as *mut f32; + a_ptr.add(x).write_unaligned(a_lin); + } } - } - XyzTarget::Luv => { - let luv = rgb.to_luv(); - unsafe { - ptr.write_unaligned(luv.l); - ptr.add(1).write_unaligned(luv.u); - ptr.add(2).write_unaligned(luv.v); + }); + } else { + dst_slice_safe_align + .par_chunks_exact_mut(dst_stride as usize) + .zip(src.par_chunks_exact(src_stride as usize)) + .for_each(|(dst, src)| unsafe { + let mut _cx = 0usize; + + if let Some(dispatcher) = _wide_row_handler { + _cx = dispatcher( + _cx, + src.as_ptr(), + 0, + width, + dst.as_mut_ptr() as *mut f32, + 0, + std::ptr::null_mut(), + 0, + matrix, + transfer_function, + ); } - } - XyzTarget::Lch => { - let lch = rgb.to_lch(); - unsafe { - ptr.write_unaligned(lch.l); - ptr.add(1).write_unaligned(lch.c); - ptr.add(2).write_unaligned(lch.h); + + let src_ptr = src.as_ptr().add(0); + let dst_ptr = dst.as_mut_ptr().add(0) as *mut f32; + + for x in _cx..width as usize { + let px = x * channels; + let src = src_ptr.add(px); + let r = src + .add(image_configuration.get_r_channel_offset()) + .read_unaligned(); + let g = src + .add(image_configuration.get_g_channel_offset()) + .read_unaligned(); + let b = src + .add(image_configuration.get_b_channel_offset()) + .read_unaligned(); + + let rgb = Rgb::::new(r, g, b); + let ptr = dst_ptr.add(x * 3); + match target { + XyzTarget::Lab => { + let lab = rgb.to_lab(); + ptr.write_unaligned(lab.l); + ptr.add(1).write_unaligned(lab.a); + ptr.add(2).write_unaligned(lab.b); + } + XyzTarget::Xyz => { + let xyz = Xyz::from_rgb(rgb, matrix, transfer_function); + ptr.write_unaligned(xyz.x); + ptr.add(1).write_unaligned(xyz.y); + ptr.add(2).write_unaligned(xyz.z); + } + XyzTarget::Luv => { + let luv = rgb.to_luv(); + ptr.write_unaligned(luv.l); + ptr.add(1).write_unaligned(luv.u); + ptr.add(2).write_unaligned(luv.v); + } + XyzTarget::Lch => { + let lch = rgb.to_lch(); + ptr.write_unaligned(lch.l); + ptr.add(1).write_unaligned(lch.c); + ptr.add(2).write_unaligned(lch.h); + } + } } + }); + } + } + + #[cfg(not(feature = "rayon"))] + { + let mut src_offset = 0usize; + let mut dst_offset = 0usize; + let mut a_offset = 0usize; + + for _ in 0..height as usize { + let mut _cx = 0usize; + + if let Some(dispatcher) = _wide_row_handler { + unsafe { + _cx = dispatcher( + _cx, + src.as_ptr(), + src_offset, + width, + dst.as_mut_ptr(), + dst_offset, + a_channel.as_mut_ptr(), + a_offset, + matrix, + transfer_function, + ); } } - if USE_ALPHA && image_configuration.has_alpha() { - let a = unsafe { - src.add(image_configuration.get_a_channel_offset()) + let src_ptr = unsafe { src.as_ptr().add(src_offset) }; + let dst_ptr = unsafe { (dst.as_mut_ptr() as *mut u8).add(dst_offset) as *mut f32 }; + + for x in _cx..width as usize { + let px = x * channels; + let src = unsafe { src_ptr.add(px) }; + let r = unsafe { + src.add(image_configuration.get_r_channel_offset()) .read_unaligned() }; - let a_lin = a as f32 * (1f32 / 255f32); - let a_ptr = - unsafe { (a_channel.as_mut_ptr() as *mut u8).add(a_offset) as *mut f32 }; - unsafe { - a_ptr.add(x).write_unaligned(a_lin); + let g = unsafe { + src.add(image_configuration.get_g_channel_offset()) + .read_unaligned() + }; + let b = unsafe { + src.add(image_configuration.get_b_channel_offset()) + .read_unaligned() + }; + + let rgb = Rgb::::new(r, g, b); + let ptr = unsafe { dst_ptr.add(x * 3) }; + match target { + XyzTarget::Lab => { + let lab = rgb.to_lab(); + unsafe { + ptr.write_unaligned(lab.l); + ptr.add(1).write_unaligned(lab.a); + ptr.add(2).write_unaligned(lab.b); + } + } + XyzTarget::Xyz => { + let xyz = Xyz::from_rgb(rgb, matrix, transfer_function); + unsafe { + ptr.write_unaligned(xyz.x); + ptr.add(1).write_unaligned(xyz.y); + ptr.add(2).write_unaligned(xyz.z); + } + } + XyzTarget::Luv => { + let luv = rgb.to_luv(); + unsafe { + ptr.write_unaligned(luv.l); + ptr.add(1).write_unaligned(luv.u); + ptr.add(2).write_unaligned(luv.v); + } + } + XyzTarget::Lch => { + let lch = rgb.to_lch(); + unsafe { + ptr.write_unaligned(lch.l); + ptr.add(1).write_unaligned(lch.c); + ptr.add(2).write_unaligned(lch.h); + } + } + } + + if USE_ALPHA && image_configuration.has_alpha() { + let a = unsafe { + src.add(image_configuration.get_a_channel_offset()) + .read_unaligned() + }; + let a_lin = a as f32 * (1f32 / 255f32); + let a_ptr = + unsafe { (a_channel.as_mut_ptr() as *mut u8).add(a_offset) as *mut f32 }; + unsafe { + a_ptr.add(x).write_unaligned(a_lin); + } } } - } - src_offset += src_stride as usize; - dst_offset += dst_stride as usize; - a_offset += a_stride as usize; + src_offset += src_stride as usize; + dst_offset += dst_stride as usize; + a_offset += a_stride as usize; + } } } diff --git a/src/image_xyza_laba.rs b/src/image_xyza_laba.rs index 1e189fc..1e97acb 100644 --- a/src/image_xyza_laba.rs +++ b/src/image_xyza_laba.rs @@ -12,6 +12,12 @@ use crate::neon::neon_channels_to_xyza_or_laba; use crate::sse::sse_channels_to_xyza_laba; use crate::xyz_target::XyzTarget; use crate::{Rgb, TransferFunction, Xyz, SRGB_TO_XYZ_D65}; +#[cfg(feature = "rayon")] +use rayon::iter::{IndexedParallelIterator, ParallelIterator}; +#[cfg(feature = "rayon")] +use rayon::prelude::{ParallelSlice, ParallelSliceMut}; +#[cfg(feature = "rayon")] +use std::slice; #[allow(clippy::type_complexity)] fn channels_to_xyz_with_alpha( @@ -30,9 +36,6 @@ fn channels_to_xyz_with_alpha::new(r, g, b); - let px = x * channels; - let dst_store = unsafe { dst_ptr.add(px) }; - match target { - XyzTarget::Lab => { - let lab = rgb.to_lab(); - unsafe { - dst_store.write_unaligned(lab.l); - dst_store.add(1).write_unaligned(lab.a); - dst_store.add(2).write_unaligned(lab.b); + let rgb = Rgb::::new(r, g, b); + let px = x * channels; + let dst_store = dst_ptr.add(px); + match target { + XyzTarget::Lab => { + let lab = rgb.to_lab(); + dst_store.write_unaligned(lab.l); + dst_store.add(1).write_unaligned(lab.a); + dst_store.add(2).write_unaligned(lab.b); + } + XyzTarget::Xyz => { + let xyz = Xyz::from_rgb(rgb, matrix, transfer_function); + dst_store.write_unaligned(xyz.x); + dst_store.add(1).write_unaligned(xyz.y); + dst_store.add(2).write_unaligned(xyz.z); + } + XyzTarget::Luv => { + let luv = rgb.to_luv(); + dst_store.write_unaligned(luv.l); + dst_store.add(1).write_unaligned(luv.u); + dst_store.add(2).write_unaligned(luv.v); + } + XyzTarget::Lch => { + let lch = rgb.to_lch(); + dst_store.write_unaligned(lch.l); + dst_store.add(1).write_unaligned(lch.c); + dst_store.add(2).write_unaligned(lch.h); + } } + + let a = src + .add(image_configuration.get_a_channel_offset()) + .read_unaligned(); + let a_lin = a as f32 * (1f32 / 255f32); + dst_store.add(3).write_unaligned(a_lin); } - XyzTarget::Xyz => { - let xyz = Xyz::from_rgb(rgb, matrix, transfer_function); - unsafe { - dst_store.write_unaligned(xyz.x); - dst_store.add(1).write_unaligned(xyz.y); - dst_store.add(2).write_unaligned(xyz.z); - } + }); + } + + #[cfg(not(feature = "rayon"))] + { + let mut src_offset = 0usize; + let mut dst_offset = 0usize; + + for _ in 0..height as usize { + let mut _cx = 0usize; + + if let Some(dispatcher) = _wide_row_handler { + unsafe { + _cx = dispatcher( + _cx, + src.as_ptr(), + src_offset, + width, + dst.as_mut_ptr(), + dst_offset, + matrix, + transfer_function, + ); } - XyzTarget::Luv => { - let luv = rgb.to_luv(); - unsafe { - dst_store.write_unaligned(luv.l); - dst_store.add(1).write_unaligned(luv.u); - dst_store.add(2).write_unaligned(luv.v); + } + + let src_ptr = unsafe { src.as_ptr().add(src_offset) }; + let dst_ptr = unsafe { (dst.as_mut_ptr() as *mut u8).add(dst_offset) as *mut f32 }; + + for x in _cx..width as usize { + let px = x * channels; + let src = unsafe { src_ptr.add(px) }; + let r = unsafe { + src.add(image_configuration.get_r_channel_offset()) + .read_unaligned() + }; + let g = unsafe { + src.add(image_configuration.get_g_channel_offset()) + .read_unaligned() + }; + let b = unsafe { + src.add(image_configuration.get_b_channel_offset()) + .read_unaligned() + }; + + let rgb = Rgb::::new(r, g, b); + let px = x * channels; + let dst_store = unsafe { dst_ptr.add(px) }; + match target { + XyzTarget::Lab => { + let lab = rgb.to_lab(); + unsafe { + dst_store.write_unaligned(lab.l); + dst_store.add(1).write_unaligned(lab.a); + dst_store.add(2).write_unaligned(lab.b); + } } - } - XyzTarget::Lch => { - let lch = rgb.to_lch(); - unsafe { - dst_store.write_unaligned(lch.l); - dst_store.add(1).write_unaligned(lch.c); - dst_store.add(2).write_unaligned(lch.h); + XyzTarget::Xyz => { + let xyz = Xyz::from_rgb(rgb, matrix, transfer_function); + unsafe { + dst_store.write_unaligned(xyz.x); + dst_store.add(1).write_unaligned(xyz.y); + dst_store.add(2).write_unaligned(xyz.z); + } + } + XyzTarget::Luv => { + let luv = rgb.to_luv(); + unsafe { + dst_store.write_unaligned(luv.l); + dst_store.add(1).write_unaligned(luv.u); + dst_store.add(2).write_unaligned(luv.v); + } + } + XyzTarget::Lch => { + let lch = rgb.to_lch(); + unsafe { + dst_store.write_unaligned(lch.l); + dst_store.add(1).write_unaligned(lch.c); + dst_store.add(2).write_unaligned(lch.h); + } } } - } - let a = unsafe { - src.add(image_configuration.get_a_channel_offset()) - .read_unaligned() - }; - let a_lin = a as f32 * (1f32 / 255f32); - unsafe { - dst_store.add(3).write_unaligned(a_lin); + let a = unsafe { + src.add(image_configuration.get_a_channel_offset()) + .read_unaligned() + }; + let a_lin = a as f32 * (1f32 / 255f32); + unsafe { + dst_store.add(3).write_unaligned(a_lin); + } } - } - src_offset += src_stride as usize; - dst_offset += dst_stride as usize; + src_offset += src_stride as usize; + dst_offset += dst_stride as usize; + } } } diff --git a/src/jzazbz_to_image.rs b/src/jzazbz_to_image.rs index 9a8cdd5..ec6657a 100644 --- a/src/jzazbz_to_image.rs +++ b/src/jzazbz_to_image.rs @@ -11,6 +11,12 @@ use crate::neon::neon_jzazbz_to_image; #[cfg(any(target_arch = "x86_64", target_arch = "x86"))] use crate::sse::sse_jzazbz_to_image; use crate::{Jzazbz, Jzczhz, TransferFunction}; +#[cfg(feature = "rayon")] +use rayon::iter::{IndexedParallelIterator, ParallelIterator}; +#[cfg(feature = "rayon")] +use rayon::prelude::{ParallelSlice, ParallelSliceMut}; +#[cfg(feature = "rayon")] +use std::slice; #[allow(clippy::type_complexity)] fn jzazbz_to_image( @@ -40,67 +46,135 @@ fn jzazbz_to_image( _wide_row_handle = Some(neon_jzazbz_to_image::); } - let mut src_offset = 0usize; - let mut dst_offset = 0usize; + #[cfg(feature = "rayon")] + { + let src_slice_safe_align = unsafe { + slice::from_raw_parts( + src.as_ptr() as *const u8, + src_stride as usize * height as usize, + ) + }; + dst.par_chunks_exact_mut(dst_stride as usize) + .zip(src_slice_safe_align.par_chunks_exact(src_stride as usize)) + .for_each(|(dst, src)| unsafe { + let channels = image_configuration.get_channels_count(); - let channels = image_configuration.get_channels_count(); + let mut _cx = 0usize; - for _ in 0..height as usize { - let mut _cx = 0usize; + let src_ptr = src.as_ptr() as *mut f32; + let dst_ptr = dst.as_mut_ptr(); - let src_ptr = unsafe { (src.as_ptr() as *const u8).add(src_offset) as *mut f32 }; - let dst_ptr = unsafe { dst.as_mut_ptr().add(dst_offset) }; + if let Some(dispatcher) = _wide_row_handle { + _cx = dispatcher( + _cx, + src.as_ptr() as *const f32, + 0, + dst.as_mut_ptr(), + 0, + width, + display_luminance, + transfer_function, + ); + } - if let Some(dispatcher) = _wide_row_handle { - unsafe { - _cx = dispatcher( - _cx, - src.as_ptr(), - src_offset as u32, - dst.as_mut_ptr(), - dst_offset as u32, - width, - display_luminance, - transfer_function, - ); - } - } + for x in _cx..width as usize { + let px = x * channels; + let l_x = src_ptr.add(px).read_unaligned(); + let l_y = src_ptr.add(px + 1).read_unaligned(); + let l_z = src_ptr.add(px + 2).read_unaligned(); + let rgb = match target { + JzazbzTarget::Jzazbz => { + let jzazbz = + Jzazbz::new_with_luminance(l_x, l_y, l_z, display_luminance); + jzazbz.to_rgb(transfer_function) + } + JzazbzTarget::Jzczhz => { + let jzczhz = Jzczhz::new(l_x, l_y, l_z); + jzczhz.to_rgb_with_luminance(display_luminance, transfer_function) + } + }; - for x in _cx..width as usize { - let px = x * channels; - let l_x = unsafe { src_ptr.add(px).read_unaligned() }; - let l_y = unsafe { src_ptr.add(px + 1).read_unaligned() }; - let l_z = unsafe { src_ptr.add(px + 2).read_unaligned() }; - let rgb = match target { - JzazbzTarget::Jzazbz => { - let jzazbz = Jzazbz::new_with_luminance(l_x, l_y, l_z, display_luminance); - jzazbz.to_rgb(transfer_function) + let dst = dst_ptr.add(x * channels); + dst.add(image_configuration.get_r_channel_offset()) + .write_unaligned(rgb.r); + dst.add(image_configuration.get_g_channel_offset()) + .write_unaligned(rgb.g); + dst.add(image_configuration.get_b_channel_offset()) + .write_unaligned(rgb.b); + if image_configuration.has_alpha() { + let l_a = src_ptr.add(px + 3).read_unaligned(); + let a_value = (l_a * 255f32).max(0f32); + dst.add(image_configuration.get_a_channel_offset()) + .write_unaligned(a_value as u8); + } } - JzazbzTarget::Jzczhz => { - let jzczhz = Jzczhz::new(l_x, l_y, l_z); - jzczhz.to_rgb_with_luminance(display_luminance, transfer_function) + }); + } + + #[cfg(not(feature = "rayon"))] + { + let mut src_offset = 0usize; + let mut dst_offset = 0usize; + + let channels = image_configuration.get_channels_count(); + + for _ in 0..height as usize { + let mut _cx = 0usize; + + let src_ptr = unsafe { (src.as_ptr() as *const u8).add(src_offset) as *mut f32 }; + let dst_ptr = unsafe { dst.as_mut_ptr().add(dst_offset) }; + + if let Some(dispatcher) = _wide_row_handle { + unsafe { + _cx = dispatcher( + _cx, + src.as_ptr(), + src_offset as u32, + dst.as_mut_ptr(), + dst_offset as u32, + width, + display_luminance, + transfer_function, + ); } - }; + } - unsafe { - let dst = dst_ptr.add(x * channels); - dst.add(image_configuration.get_r_channel_offset()) - .write_unaligned(rgb.r); - dst.add(image_configuration.get_g_channel_offset()) - .write_unaligned(rgb.g); - dst.add(image_configuration.get_b_channel_offset()) - .write_unaligned(rgb.b); - if image_configuration.has_alpha() { - let l_a = src_ptr.add(px + 3).read_unaligned(); - let a_value = (l_a * 255f32).max(0f32); - dst.add(image_configuration.get_a_channel_offset()) - .write_unaligned(a_value as u8); + for x in _cx..width as usize { + let px = x * channels; + let l_x = unsafe { src_ptr.add(px).read_unaligned() }; + let l_y = unsafe { src_ptr.add(px + 1).read_unaligned() }; + let l_z = unsafe { src_ptr.add(px + 2).read_unaligned() }; + let rgb = match target { + JzazbzTarget::Jzazbz => { + let jzazbz = Jzazbz::new_with_luminance(l_x, l_y, l_z, display_luminance); + jzazbz.to_rgb(transfer_function) + } + JzazbzTarget::Jzczhz => { + let jzczhz = Jzczhz::new(l_x, l_y, l_z); + jzczhz.to_rgb_with_luminance(display_luminance, transfer_function) + } + }; + + unsafe { + let dst = dst_ptr.add(x * channels); + dst.add(image_configuration.get_r_channel_offset()) + .write_unaligned(rgb.r); + dst.add(image_configuration.get_g_channel_offset()) + .write_unaligned(rgb.g); + dst.add(image_configuration.get_b_channel_offset()) + .write_unaligned(rgb.b); + if image_configuration.has_alpha() { + let l_a = src_ptr.add(px + 3).read_unaligned(); + let a_value = (l_a * 255f32).max(0f32); + dst.add(image_configuration.get_a_channel_offset()) + .write_unaligned(a_value as u8); + } } } - } - src_offset += src_stride as usize; - dst_offset += dst_stride as usize; + src_offset += src_stride as usize; + dst_offset += dst_stride as usize; + } } } diff --git a/src/lalphabeta_to_image.rs b/src/lalphabeta_to_image.rs index 0623232..744e15c 100644 --- a/src/lalphabeta_to_image.rs +++ b/src/lalphabeta_to_image.rs @@ -6,6 +6,12 @@ */ use crate::image::ImageConfiguration; use crate::{LAlphaBeta, TransferFunction}; +#[cfg(feature = "rayon")] +use rayon::iter::{IndexedParallelIterator, ParallelIterator}; +#[cfg(feature = "rayon")] +use rayon::prelude::{ParallelSlice, ParallelSliceMut}; +#[cfg(feature = "rayon")] +use std::slice; fn lalphabeta_to_image( src: &[f32], @@ -18,44 +24,88 @@ fn lalphabeta_to_image( ) { let image_configuration: ImageConfiguration = CHANNELS_CONFIGURATION.into(); - let mut src_offset = 0usize; - let mut dst_offset = 0usize; - let channels = image_configuration.get_channels_count(); - for _ in 0..height as usize { - let mut _cx = 0usize; - - let src_ptr = unsafe { (src.as_ptr() as *const u8).add(src_offset) as *mut f32 }; - let dst_ptr = unsafe { dst.as_mut_ptr().add(dst_offset) }; - - for x in _cx..width as usize { - let px = x * channels; - let l_x = unsafe { src_ptr.add(px).read_unaligned() }; - let l_y = unsafe { src_ptr.add(px + 1).read_unaligned() }; - let l_z = unsafe { src_ptr.add(px + 2).read_unaligned() }; - let lalphabeta = LAlphaBeta::new(l_x, l_y, l_z); - let rgb = lalphabeta.to_rgb(transfer_function); - - unsafe { - let dst = dst_ptr.add(x * channels); - dst.add(image_configuration.get_r_channel_offset()) - .write_unaligned(rgb.r); - dst.add(image_configuration.get_g_channel_offset()) - .write_unaligned(rgb.g); - dst.add(image_configuration.get_b_channel_offset()) - .write_unaligned(rgb.b); - if image_configuration.has_alpha() { - let l_a = src_ptr.add(px + 3).read_unaligned(); - let a_value = (l_a * 255f32).max(0f32); - dst.add(image_configuration.get_a_channel_offset()) - .write_unaligned(a_value as u8); + #[cfg(feature = "rayon")] + { + let src_slice_safe_align = unsafe { + slice::from_raw_parts( + src.as_ptr() as *const u8, + src_stride as usize * height as usize, + ) + }; + dst.par_chunks_exact_mut(dst_stride as usize) + .zip(src_slice_safe_align.par_chunks_exact(src_stride as usize)) + .for_each(|(dst, src)| unsafe { + let mut _cx = 0usize; + + let src_ptr = src.as_ptr() as *mut f32; + let dst_ptr = dst.as_mut_ptr(); + + for x in _cx..width as usize { + let px = x * channels; + let l_x = src_ptr.add(px).read_unaligned(); + let l_y = src_ptr.add(px + 1).read_unaligned(); + let l_z = src_ptr.add(px + 2).read_unaligned(); + let lalphabeta = LAlphaBeta::new(l_x, l_y, l_z); + let rgb = lalphabeta.to_rgb(transfer_function); + + let dst = dst_ptr.add(x * channels); + dst.add(image_configuration.get_r_channel_offset()) + .write_unaligned(rgb.r); + dst.add(image_configuration.get_g_channel_offset()) + .write_unaligned(rgb.g); + dst.add(image_configuration.get_b_channel_offset()) + .write_unaligned(rgb.b); + if image_configuration.has_alpha() { + let l_a = src_ptr.add(px + 3).read_unaligned(); + let a_value = (l_a * 255f32).max(0f32); + dst.add(image_configuration.get_a_channel_offset()) + .write_unaligned(a_value as u8); + } + } + }); + } + + #[cfg(not(feature = "rayon"))] + { + let mut src_offset = 0usize; + let mut dst_offset = 0usize; + + for _ in 0..height as usize { + let mut _cx = 0usize; + + let src_ptr = unsafe { (src.as_ptr() as *const u8).add(src_offset) as *mut f32 }; + let dst_ptr = unsafe { dst.as_mut_ptr().add(dst_offset) }; + + for x in _cx..width as usize { + let px = x * channels; + let l_x = unsafe { src_ptr.add(px).read_unaligned() }; + let l_y = unsafe { src_ptr.add(px + 1).read_unaligned() }; + let l_z = unsafe { src_ptr.add(px + 2).read_unaligned() }; + let lalphabeta = LAlphaBeta::new(l_x, l_y, l_z); + let rgb = lalphabeta.to_rgb(transfer_function); + + unsafe { + let dst = dst_ptr.add(x * channels); + dst.add(image_configuration.get_r_channel_offset()) + .write_unaligned(rgb.r); + dst.add(image_configuration.get_g_channel_offset()) + .write_unaligned(rgb.g); + dst.add(image_configuration.get_b_channel_offset()) + .write_unaligned(rgb.b); + if image_configuration.has_alpha() { + let l_a = src_ptr.add(px + 3).read_unaligned(); + let a_value = (l_a * 255f32).max(0f32); + dst.add(image_configuration.get_a_channel_offset()) + .write_unaligned(a_value as u8); + } } } - } - src_offset += src_stride as usize; - dst_offset += dst_stride as usize; + src_offset += src_stride as usize; + dst_offset += dst_stride as usize; + } } } diff --git a/src/linear_to_image.rs b/src/linear_to_image.rs index 6ec6e69..e74a35d 100644 --- a/src/linear_to_image.rs +++ b/src/linear_to_image.rs @@ -4,7 +4,6 @@ * // Use of this source code is governed by a BSD-style * // license that can be found in the LICENSE file. */ - #[cfg(any(target_arch = "x86_64", target_arch = "x86"))] use crate::avx::avx_linear_to_gamma; use crate::gamma_curves::TransferFunction; @@ -14,6 +13,12 @@ use crate::neon::neon_linear_to_gamma; #[cfg(any(target_arch = "x86_64", target_arch = "x86"))] use crate::sse::sse_linear_to_gamma; use crate::Rgb; +#[cfg(feature = "rayon")] +use rayon::iter::{IndexedParallelIterator, ParallelIterator}; +#[cfg(feature = "rayon")] +use rayon::prelude::{ParallelSlice, ParallelSliceMut}; +#[cfg(feature = "rayon")] +use std::slice; #[allow(clippy::type_complexity)] fn linear_to_gamma_channels( @@ -39,9 +44,6 @@ fn linear_to_gamma_channels); } - let mut src_offset = 0usize; - let mut dst_offset = 0usize; - let channels = image_configuration.get_channels_count(); #[cfg(any(target_arch = "x86_64", target_arch = "x86"))] @@ -54,76 +56,148 @@ fn linear_to_gamma_channels); } - for _ in 0..height as usize { - let mut _cx = 0usize; - - if let Some(dispatcher) = _wide_row_handle { - unsafe { - _cx = dispatcher( - _cx, - src.as_ptr(), - src_offset as u32, - dst.as_mut_ptr(), - dst_offset as u32, - width, - transfer_function, - ); - } - } + #[cfg(feature = "rayon")] + { + let src_slice_safe_align = unsafe { + slice::from_raw_parts( + src.as_ptr() as *const u8, + src_stride as usize * height as usize, + ) + }; + dst.par_chunks_exact_mut(dst_stride as usize) + .zip(src_slice_safe_align.par_chunks_exact(src_stride as usize)) + .for_each(|(dst, src)| unsafe { + let mut _cx = 0usize; + + if let Some(dispatcher) = _wide_row_handle { + _cx = dispatcher( + _cx, + src.as_ptr() as *const f32, + 0, + dst.as_mut_ptr(), + 0, + width, + transfer_function, + ); + } + + let src_ptr = src.as_ptr() as *const f32; + let dst_ptr = dst.as_mut_ptr(); + + for x in _cx..width as usize { + let px = x * channels; + let src_slice = src_ptr.add(px); + let r = src_slice + .add(image_configuration.get_r_channel_offset()) + .read_unaligned(); + let g = src_slice + .add(image_configuration.get_g_channel_offset()) + .read_unaligned(); + let b = src_slice + .add(image_configuration.get_b_channel_offset()) + .read_unaligned(); + + let rgb = Rgb::::new( + r.min(1f32).max(0f32), + g.min(1f32).max(0f32), + b.min(1f32).max(0f32), + ); + + let dst = dst_ptr.add(px); + let transferred = rgb.gamma(transfer_function); + let rgb8 = transferred.to_u8(); + + dst.write_unaligned(rgb8.r); + dst.add(1).write_unaligned(rgb8.g); + dst.add(2).write_unaligned(rgb8.b); + + if USE_ALPHA && image_configuration.has_alpha() { + let a = src_slice + .add(image_configuration.get_a_channel_offset()) + .read_unaligned(); + let a_lin = (a * 255f32).round() as u8; + dst.add(3).write_unaligned(a_lin); + } + } + }); + } + + #[cfg(not(feature = "rayon"))] + { + let mut src_offset = 0usize; + let mut dst_offset = 0usize; - let src_ptr = unsafe { (src.as_ptr() as *const u8).add(src_offset) as *const f32 }; - let dst_ptr = unsafe { dst.as_mut_ptr().add(dst_offset) }; - - for x in _cx..width as usize { - let px = x * channels; - let src_slice = unsafe { src_ptr.add(px) }; - let r = unsafe { - src_slice - .add(image_configuration.get_r_channel_offset()) - .read_unaligned() - }; - let g = unsafe { - src_slice - .add(image_configuration.get_g_channel_offset()) - .read_unaligned() - }; - let b = unsafe { - src_slice - .add(image_configuration.get_b_channel_offset()) - .read_unaligned() - }; - - let rgb = Rgb::::new( - r.min(1f32).max(0f32), - g.min(1f32).max(0f32), - b.min(1f32).max(0f32), - ); - - let dst = unsafe { dst_ptr.add(px) }; - let transferred = rgb.gamma(transfer_function); - let rgb8 = transferred.to_u8(); - - unsafe { - dst.write_unaligned(rgb8.r); - dst.add(1).write_unaligned(rgb8.g); - dst.add(2).write_unaligned(rgb8.b); + for _ in 0..height as usize { + let mut _cx = 0usize; + + if let Some(dispatcher) = _wide_row_handle { + unsafe { + _cx = dispatcher( + _cx, + src.as_ptr(), + src_offset as u32, + dst.as_mut_ptr(), + dst_offset as u32, + width, + transfer_function, + ); + } } - if USE_ALPHA && image_configuration.has_alpha() { - let a = unsafe { + let src_ptr = unsafe { (src.as_ptr() as *const u8).add(src_offset) as *const f32 }; + let dst_ptr = unsafe { dst.as_mut_ptr().add(dst_offset) }; + + for x in _cx..width as usize { + let px = x * channels; + let src_slice = unsafe { src_ptr.add(px) }; + let r = unsafe { + src_slice + .add(image_configuration.get_r_channel_offset()) + .read_unaligned() + }; + let g = unsafe { + src_slice + .add(image_configuration.get_g_channel_offset()) + .read_unaligned() + }; + let b = unsafe { src_slice - .add(image_configuration.get_a_channel_offset()) + .add(image_configuration.get_b_channel_offset()) .read_unaligned() }; - let a_lin = (a * 255f32).round() as u8; + + let rgb = Rgb::::new( + r.min(1f32).max(0f32), + g.min(1f32).max(0f32), + b.min(1f32).max(0f32), + ); + + let dst = unsafe { dst_ptr.add(px) }; + let transferred = rgb.gamma(transfer_function); + let rgb8 = transferred.to_u8(); + unsafe { - dst.add(3).write_unaligned(a_lin); + dst.write_unaligned(rgb8.r); + dst.add(1).write_unaligned(rgb8.g); + dst.add(2).write_unaligned(rgb8.b); + } + + if USE_ALPHA && image_configuration.has_alpha() { + let a = unsafe { + src_slice + .add(image_configuration.get_a_channel_offset()) + .read_unaligned() + }; + let a_lin = (a * 255f32).round() as u8; + unsafe { + dst.add(3).write_unaligned(a_lin); + } } } - } - src_offset += src_stride as usize; - dst_offset += dst_stride as usize; + src_offset += src_stride as usize; + dst_offset += dst_stride as usize; + } } } diff --git a/src/linear_to_image_u8.rs b/src/linear_to_image_u8.rs index 158b789..b3cdd31 100644 --- a/src/linear_to_image_u8.rs +++ b/src/linear_to_image_u8.rs @@ -5,8 +5,6 @@ * // license that can be found in the LICENSE file. */ -use std::slice; - use crate::gamma_curves::TransferFunction; use crate::image::ImageConfiguration; #[cfg(all(target_arch = "aarch64", target_feature = "neon"))] @@ -14,6 +12,12 @@ use crate::neon::*; #[cfg(any(target_arch = "x86_64", target_arch = "x86"))] use crate::sse::sse_image_to_linear_unsigned::sse_channels_to_linear_u8; use crate::Rgb; +#[cfg(feature = "rayon")] +use rayon::iter::{IndexedParallelIterator, ParallelIterator}; +#[cfg(feature = "rayon")] +use rayon::prelude::{ParallelSlice, ParallelSliceMut}; +#[cfg(not(feature = "rayon"))] +use std::slice; #[allow(clippy::type_complexity)] fn linear_to_gamma_channels( @@ -22,7 +26,7 @@ fn linear_to_gamma_channels); } - for _ in 0..height as usize { - let mut _cx = 0usize; - - if let Some(dispatcher) = _wide_row_handler { - unsafe { - _cx = dispatcher( - _cx, - src.as_ptr(), - src_offset, - width, - dst.as_mut_ptr(), - dst_offset, - transfer_function, - ); - } - } + #[cfg(feature = "rayon")] + { + dst.par_chunks_exact_mut(dst_stride as usize) + .zip(src.par_chunks_exact(src_stride as usize)) + .for_each(|(dst, src)| unsafe { + let mut _cx = 0usize; + + if let Some(dispatcher) = _wide_row_handler { + _cx = dispatcher( + _cx, + src.as_ptr(), + 0, + width, + dst.as_mut_ptr(), + 0, + transfer_function, + ); + } + + for x in _cx..width as usize { + let px = x * channels; + let r = *src.get_unchecked(px + image_configuration.get_r_channel_offset()); + let g = *src.get_unchecked(px + image_configuration.get_g_channel_offset()); + let b = *src.get_unchecked(px + image_configuration.get_b_channel_offset()); + + let rgb = Rgb::::new(r, g, b); + let mut rgb = rgb.to_rgb_f32(); + + rgb = rgb.gamma(transfer_function); + let new_rgb = rgb.to_u8(); + + *dst.get_unchecked_mut(px) = new_rgb.r; + *dst.get_unchecked_mut(px + 1) = new_rgb.g; + *dst.get_unchecked_mut(px + 2) = new_rgb.b; + + if USE_ALPHA && image_configuration.has_alpha() { + let a = src.get_unchecked(px + image_configuration.get_a_channel_offset()); + *dst.get_unchecked_mut(px + 3) = *a; + } + } + }); + } - let src_ptr = unsafe { src.as_ptr().add(src_offset) }; - let dst_ptr = unsafe { dst.as_mut_ptr().add(dst_offset) }; - - let src_slice = unsafe { slice::from_raw_parts(src_ptr, width as usize * channels) }; - let dst_slice = unsafe { slice::from_raw_parts_mut(dst_ptr, width as usize * channels) }; - - for x in _cx..width as usize { - let px = x * channels; - let r = unsafe { - *src_slice.get_unchecked(px + image_configuration.get_r_channel_offset()) - }; - let g = unsafe { - *src_slice.get_unchecked(px + image_configuration.get_g_channel_offset()) - }; - let b = unsafe { - *src_slice.get_unchecked(px + image_configuration.get_b_channel_offset()) - }; - - let rgb = Rgb::::new(r, g, b); - let mut rgb = rgb.to_rgb_f32(); - - rgb = rgb.gamma(transfer_function); - let new_rgb = rgb.to_u8(); - - unsafe { - *dst_slice.get_unchecked_mut(px) = new_rgb.r; - *dst_slice.get_unchecked_mut(px + 1) = new_rgb.g; - *dst_slice.get_unchecked_mut(px + 2) = new_rgb.b; + #[cfg(not(feature = "rayon"))] + { + let mut src_offset = 0usize; + let mut dst_offset = 0usize; + + for _ in 0.._height as usize { + let mut _cx = 0usize; + + if let Some(dispatcher) = _wide_row_handler { + unsafe { + _cx = dispatcher( + _cx, + src.as_ptr(), + src_offset, + width, + dst.as_mut_ptr(), + dst_offset, + transfer_function, + ); + } } - if USE_ALPHA && image_configuration.has_alpha() { - let a = unsafe { - *src_slice.get_unchecked(px + image_configuration.get_a_channel_offset()) + let src_ptr = unsafe { src.as_ptr().add(src_offset) }; + let dst_ptr = unsafe { dst.as_mut_ptr().add(dst_offset) }; + + let src_slice = unsafe { slice::from_raw_parts(src_ptr, width as usize * channels) }; + let dst_slice = + unsafe { slice::from_raw_parts_mut(dst_ptr, width as usize * channels) }; + + for x in _cx..width as usize { + let px = x * channels; + let r = unsafe { + *src_slice.get_unchecked(px + image_configuration.get_r_channel_offset()) + }; + let g = unsafe { + *src_slice.get_unchecked(px + image_configuration.get_g_channel_offset()) }; + let b = unsafe { + *src_slice.get_unchecked(px + image_configuration.get_b_channel_offset()) + }; + + let rgb = Rgb::::new(r, g, b); + let mut rgb = rgb.to_rgb_f32(); + + rgb = rgb.gamma(transfer_function); + let new_rgb = rgb.to_u8(); + unsafe { - *dst_slice.get_unchecked_mut(px + 3) = a; + *dst_slice.get_unchecked_mut(px) = new_rgb.r; + *dst_slice.get_unchecked_mut(px + 1) = new_rgb.g; + *dst_slice.get_unchecked_mut(px + 2) = new_rgb.b; + } + + if USE_ALPHA && image_configuration.has_alpha() { + let a = unsafe { + *src_slice.get_unchecked(px + image_configuration.get_a_channel_offset()) + }; + unsafe { + *dst_slice.get_unchecked_mut(px + 3) = a; + } } } - } - src_offset += src_stride as usize; - dst_offset += dst_stride as usize; + src_offset += src_stride as usize; + dst_offset += dst_stride as usize; + } } } diff --git a/src/linear_to_planar.rs b/src/linear_to_planar.rs index 41bafa3..90ebe1d 100644 --- a/src/linear_to_planar.rs +++ b/src/linear_to_planar.rs @@ -10,6 +10,12 @@ use crate::neon::linear_to_planar::neon_linear_plane_to_gamma; #[cfg(any(target_arch = "x86_64", target_arch = "x86"))] use crate::sse::sse_linear_plane_to_gamma; use crate::TransferFunction; +#[cfg(feature = "rayon")] +use rayon::iter::{IndexedParallelIterator, ParallelIterator}; +#[cfg(feature = "rayon")] +use rayon::prelude::{ParallelSlice, ParallelSliceMut}; +#[cfg(feature = "rayon")] +use std::slice; #[allow(clippy::type_complexity)] fn linear_to_gamma_channels( @@ -21,9 +27,6 @@ fn linear_to_gamma_channels( height: u32, transfer_function: TransferFunction, ) { - let mut src_offset = 0usize; - let mut dst_offset = 0usize; - let mut _wide_row_handler: Option< unsafe fn(usize, *const f32, u32, *mut u8, u32, u32, TransferFunction) -> usize, > = None; @@ -38,42 +41,90 @@ fn linear_to_gamma_channels( _wide_row_handler = Some(sse_linear_plane_to_gamma); } - for _ in 0..height as usize { - let mut _cx = 0usize; - - if let Some(dispatcher) = _wide_row_handler { - unsafe { - _cx = dispatcher( - _cx, - src.as_ptr(), - src_offset as u32, - dst.as_mut_ptr(), - dst_offset as u32, - width, - transfer_function, - ); + #[cfg(feature = "rayon")] + { + let src_slice_safe_align = unsafe { + slice::from_raw_parts( + src.as_ptr() as *const u8, + src_stride as usize * height as usize, + ) + }; + dst.par_chunks_exact_mut(dst_stride as usize) + .zip(src_slice_safe_align.par_chunks_exact(src_stride as usize)) + .for_each(|(dst, src)| unsafe { + let mut _cx = 0usize; + + if let Some(dispatcher) = _wide_row_handler { + _cx = dispatcher( + _cx, + src.as_ptr() as *const f32, + 0, + dst.as_mut_ptr(), + 0, + width, + transfer_function, + ); + } + + let src_ptr = src.as_ptr() as *const f32; + let dst_ptr = dst.as_mut_ptr(); + + for x in _cx..width as usize { + let px = x; + let src_slice = src_ptr.add(px); + let pixel = src_slice.read_unaligned().min(1f32).max(0f32); + + let dst = dst_ptr.add(px); + let transferred = transfer_function.gamma(pixel); + let rgb8 = (transferred * 255f32).min(255f32).max(0f32) as u8; + + dst.write_unaligned(rgb8); + } + }); + } + + #[cfg(not(feature = "rayon"))] + { + let mut src_offset = 0usize; + let mut dst_offset = 0usize; + + for _ in 0..height as usize { + let mut _cx = 0usize; + + if let Some(dispatcher) = _wide_row_handler { + unsafe { + _cx = dispatcher( + _cx, + src.as_ptr(), + src_offset as u32, + dst.as_mut_ptr(), + dst_offset as u32, + width, + transfer_function, + ); + } } - } - let src_ptr = unsafe { (src.as_ptr() as *const u8).add(src_offset) as *const f32 }; - let dst_ptr = unsafe { dst.as_mut_ptr().add(dst_offset) }; + let src_ptr = unsafe { (src.as_ptr() as *const u8).add(src_offset) as *const f32 }; + let dst_ptr = unsafe { dst.as_mut_ptr().add(dst_offset) }; - for x in _cx..width as usize { - let px = x; - let src_slice = unsafe { src_ptr.add(px) }; - let pixel = unsafe { src_slice.read_unaligned() }.min(1f32).max(0f32); + for x in _cx..width as usize { + let px = x; + let src_slice = unsafe { src_ptr.add(px) }; + let pixel = unsafe { src_slice.read_unaligned() }.min(1f32).max(0f32); - let dst = unsafe { dst_ptr.add(px) }; - let transferred = transfer_function.gamma(pixel); - let rgb8 = (transferred * 255f32).min(255f32).max(0f32) as u8; + let dst = unsafe { dst_ptr.add(px) }; + let transferred = transfer_function.gamma(pixel); + let rgb8 = (transferred * 255f32).min(255f32).max(0f32) as u8; - unsafe { - dst.write_unaligned(rgb8); + unsafe { + dst.write_unaligned(rgb8); + } } - } - src_offset += src_stride as usize; - dst_offset += dst_stride as usize; + src_offset += src_stride as usize; + dst_offset += dst_stride as usize; + } } } diff --git a/src/neon/gamma_curves.rs b/src/neon/gamma_curves.rs index 44500d8..f5b7833 100644 --- a/src/neon/gamma_curves.rs +++ b/src/neon/gamma_curves.rs @@ -94,6 +94,24 @@ pub unsafe fn neon_pure_gamma_function(gamma: float32x4_t, gamma_constant: f32) vbslq_f32(ones_mask, ones, rs) } +#[inline(always)] +pub unsafe fn neon_smpte428_to_linear(gamma: float32x4_t) -> float32x4_t { + const SCALE: f32 = 1. / 0.91655527974030934f32; + vmulq_n_f32( + vpowq_n_f32(vmaxq_f32(gamma, vdupq_n_f32(0.)), 2.6f32), + SCALE, + ) +} + +#[inline(always)] +pub unsafe fn neon_smpte428_from_linear(linear: float32x4_t) -> float32x4_t { + const POWER_VALUE: f32 = 1.0f32 / 2.6f32; + vpowq_n_f32( + vmulq_n_f32(vmaxq_f32(linear, vdupq_n_f32(0.)), 0.91655527974030934f32), + POWER_VALUE, + ) +} + #[inline(always)] pub unsafe fn neon_gamma2p2_to_linear(gamma: float32x4_t) -> float32x4_t { neon_pure_gamma_function(gamma, 2.2f32) @@ -114,18 +132,6 @@ pub unsafe fn neon_gamma2p8_from_linear(linear: float32x4_t) -> float32x4_t { neon_pure_gamma_function(linear, 1f32 / 2.8f32) } -#[inline(always)] -pub unsafe fn get_neon_linear_transfer( - transfer_function: TransferFunction, -) -> unsafe fn(float32x4_t) -> float32x4_t { - match transfer_function { - TransferFunction::Srgb => neon_srgb_to_linear, - TransferFunction::Rec709 => neon_rec709_to_linear, - TransferFunction::Gamma2p2 => neon_gamma2p2_to_linear, - TransferFunction::Gamma2p8 => neon_gamma2p8_to_linear, - } -} - #[inline(always)] pub unsafe fn neon_perform_linear_transfer( transfer_function: TransferFunction, @@ -136,18 +142,7 @@ pub unsafe fn neon_perform_linear_transfer( TransferFunction::Rec709 => neon_rec709_to_linear(v), TransferFunction::Gamma2p2 => neon_gamma2p2_to_linear(v), TransferFunction::Gamma2p8 => neon_gamma2p8_to_linear(v), - } -} - -#[inline(always)] -pub unsafe fn get_neon_gamma_transfer( - transfer_function: TransferFunction, -) -> unsafe fn(float32x4_t) -> float32x4_t { - match transfer_function { - TransferFunction::Srgb => neon_srgb_from_linear, - TransferFunction::Rec709 => neon_rec709_from_linear, - TransferFunction::Gamma2p2 => neon_gamma2p2_from_linear, - TransferFunction::Gamma2p8 => neon_gamma2p8_from_linear, + TransferFunction::Smpte428 => neon_smpte428_to_linear(v), } } @@ -161,5 +156,6 @@ pub unsafe fn neon_perform_gamma_transfer( TransferFunction::Rec709 => neon_rec709_from_linear(v), TransferFunction::Gamma2p2 => neon_gamma2p2_from_linear(v), TransferFunction::Gamma2p8 => neon_gamma2p8_from_linear(v), + TransferFunction::Smpte428 => neon_smpte428_from_linear(v), } } diff --git a/src/neon/linear_to_planar.rs b/src/neon/linear_to_planar.rs index dc4772b..cf09d8e 100644 --- a/src/neon/linear_to_planar.rs +++ b/src/neon/linear_to_planar.rs @@ -5,28 +5,24 @@ * // license that can be found in the LICENSE file. */ -use std::arch::aarch64::*; - -use crate::neon::get_neon_gamma_transfer; +use crate::neon::neon_perform_gamma_transfer; use crate::TransferFunction; +use std::arch::aarch64::*; #[inline(always)] -unsafe fn transfer_to_gamma( - r: float32x4_t, - transfer: &unsafe fn(float32x4_t) -> float32x4_t, -) -> uint32x4_t { - vcvtaq_u32_f32(vmulq_n_f32(transfer(r), 255f32)) +unsafe fn transfer_to_gamma(r: float32x4_t, transfer_function: TransferFunction) -> uint32x4_t { + vcvtaq_u32_f32(vmulq_n_f32( + neon_perform_gamma_transfer(transfer_function, r), + 255f32, + )) } #[inline(always)] -unsafe fn process_set( - k: float32x4x4_t, - function: &unsafe fn(float32x4_t) -> float32x4_t, -) -> uint8x16_t { - let y0 = transfer_to_gamma(k.0, function); - let y1 = transfer_to_gamma(k.1, function); - let y2 = transfer_to_gamma(k.2, function); - let y3 = transfer_to_gamma(k.3, function); +unsafe fn process_set(k: float32x4x4_t, transfer_function: TransferFunction) -> uint8x16_t { + let y0 = transfer_to_gamma(k.0, transfer_function); + let y1 = transfer_to_gamma(k.1, transfer_function); + let y2 = transfer_to_gamma(k.2, transfer_function); + let y3 = transfer_to_gamma(k.3, transfer_function); let y_row01 = vcombine_u16(vqmovn_u32(y0), vqmovn_u32(y1)); let y_row23 = vcombine_u16(vqmovn_u32(y2), vqmovn_u32(y3)); @@ -46,8 +42,6 @@ pub unsafe fn neon_linear_plane_to_gamma( ) -> usize { let mut cx = start_cx; - let function = get_neon_gamma_transfer(transfer_function); - while cx + 64 < width as usize { let offset_src_ptr = ((src as *const u8).add(src_offset as usize) as *const f32).add(cx); @@ -56,10 +50,10 @@ pub unsafe fn neon_linear_plane_to_gamma( let pixel_row2 = vld1q_f32_x4(offset_src_ptr.add(32)); let pixel_row3 = vld1q_f32_x4(offset_src_ptr.add(48)); - let set0 = process_set(pixel_row0, &function); - let set1 = process_set(pixel_row1, &function); - let set2 = process_set(pixel_row2, &function); - let set3 = process_set(pixel_row3, &function); + let set0 = process_set(pixel_row0, transfer_function); + let set1 = process_set(pixel_row1, transfer_function); + let set2 = process_set(pixel_row2, transfer_function); + let set3 = process_set(pixel_row3, transfer_function); let dst_ptr = dst.add(dst_offset as usize + cx); @@ -73,7 +67,7 @@ pub unsafe fn neon_linear_plane_to_gamma( let offset_src_ptr = ((src as *const u8).add(src_offset as usize) as *const f32).add(cx); let pixel_row = vld1q_f32_x4(offset_src_ptr); - let r_row = process_set(pixel_row, &function); + let r_row = process_set(pixel_row, transfer_function); let dst_ptr = dst.add(dst_offset as usize + cx); vst1q_u8(dst_ptr, r_row); diff --git a/src/neon/planar_to_linear.rs b/src/neon/planar_to_linear.rs index b8d5771..490fdc3 100644 --- a/src/neon/planar_to_linear.rs +++ b/src/neon/planar_to_linear.rs @@ -10,38 +10,32 @@ use crate::neon::*; use std::arch::aarch64::*; #[inline(always)] -unsafe fn neon_to_linear( - r: uint32x4_t, - transfer: &unsafe fn(float32x4_t) -> float32x4_t, -) -> float32x4_t { +unsafe fn neon_to_linear(r: uint32x4_t, transfer_function: TransferFunction) -> float32x4_t { let r_f = vmulq_n_f32(vcvtq_f32_u32(r), 1f32 / 255f32); - transfer(r_f) + neon_perform_linear_transfer(transfer_function, r_f) } #[inline] -unsafe fn process_pixels( - pixels: uint8x16_t, - transfer: &unsafe fn(float32x4_t) -> float32x4_t, -) -> float32x4x4_t { +unsafe fn process_pixels(pixels: uint8x16_t, transfer_function: TransferFunction) -> float32x4x4_t { let r_low = vmovl_u8(vget_low_u8(pixels)); let r_low_low = vmovl_u16(vget_low_u16(r_low)); - let x_low_low = neon_to_linear(r_low_low, transfer); + let x_low_low = neon_to_linear(r_low_low, transfer_function); let r_low_high = vmovl_high_u16(r_low); - let x_low_high = neon_to_linear(r_low_high, transfer); + let x_low_high = neon_to_linear(r_low_high, transfer_function); let r_high = vmovl_high_u8(pixels); let r_high_low = vmovl_u16(vget_low_u16(r_high)); - let x_high_low = neon_to_linear(r_high_low, transfer); + let x_high_low = neon_to_linear(r_high_low, transfer_function); let r_high_high = vmovl_high_u16(r_high); - let x_high_high = neon_to_linear(r_high_high, transfer); + let x_high_high = neon_to_linear(r_high_high, transfer_function); float32x4x4_t(x_low_low, x_low_high, x_high_low, x_high_high) } @@ -56,23 +50,22 @@ pub unsafe fn neon_plane_to_linear( transfer_function: TransferFunction, ) -> usize { let mut cx = start_cx; - let transfer = get_neon_linear_transfer(transfer_function); let dst_ptr = (dst as *mut u8).add(dst_offset) as *mut f32; while cx + 64 < width as usize { let src_ptr = src.add(src_offset + cx); let pixels_row64 = vld1q_u8_x4(src_ptr); - let storing_row0 = process_pixels(pixels_row64.0, &transfer); + let storing_row0 = process_pixels(pixels_row64.0, transfer_function); vst1q_f32_x4(dst_ptr.add(cx), storing_row0); - let storing_row1 = process_pixels(pixels_row64.1, &transfer); + let storing_row1 = process_pixels(pixels_row64.1, transfer_function); vst1q_f32_x4(dst_ptr.add(cx + 16), storing_row1); - let storing_row2 = process_pixels(pixels_row64.2, &transfer); + let storing_row2 = process_pixels(pixels_row64.2, transfer_function); vst1q_f32_x4(dst_ptr.add(cx + 32), storing_row2); - let storing_row3 = process_pixels(pixels_row64.3, &transfer); + let storing_row3 = process_pixels(pixels_row64.3, transfer_function); vst1q_f32_x4(dst_ptr.add(cx + 48), storing_row3); cx += 64; @@ -81,7 +74,7 @@ pub unsafe fn neon_plane_to_linear( while cx + 16 < width as usize { let src_ptr = src.add(src_offset + cx); let pixels = vld1q_u8(src_ptr); - let storing_row = process_pixels(pixels, &transfer); + let storing_row = process_pixels(pixels, transfer_function); vst1q_f32_x4(dst_ptr.add(cx), storing_row); cx += 16; diff --git a/src/neon/to_linear_u8.rs b/src/neon/to_linear_u8.rs index 3ed2ab3..bdd15a3 100644 --- a/src/neon/to_linear_u8.rs +++ b/src/neon/to_linear_u8.rs @@ -6,10 +6,7 @@ */ use crate::image::ImageConfiguration; -use crate::neon::{ - neon_perform_gamma_transfer, - neon_perform_linear_transfer, -}; +use crate::neon::{neon_perform_gamma_transfer, neon_perform_linear_transfer}; use crate::{ load_u8_and_deinterleave, load_u8_and_deinterleave_half, load_u8_and_deinterleave_quarter, TransferFunction, @@ -88,15 +85,23 @@ pub unsafe fn neon_channels_to_linear_u8< let g_low_low = vmovl_u16(vget_low_u16(g_low)); let b_low_low = vmovl_u16(vget_low_u16(b_low)); - let (x_low_low, y_low_low, z_low_low) = - neon_triple_to_linear_u8::(r_low_low, g_low_low, b_low_low, transfer_function); + let (x_low_low, y_low_low, z_low_low) = neon_triple_to_linear_u8::( + r_low_low, + g_low_low, + b_low_low, + transfer_function, + ); let r_low_high = vmovl_high_u16(r_low); let g_low_high = vmovl_high_u16(g_low); let b_low_high = vmovl_high_u16(b_low); - let (x_low_high, y_low_high, z_low_high) = - neon_triple_to_linear_u8::(r_low_high, g_low_high, b_low_high, transfer_function); + let (x_low_high, y_low_high, z_low_high) = neon_triple_to_linear_u8::( + r_low_high, + g_low_high, + b_low_high, + transfer_function, + ); let r_high = vmovl_high_u8(r_chan); let g_high = vmovl_high_u8(g_chan); @@ -106,15 +111,23 @@ pub unsafe fn neon_channels_to_linear_u8< let g_high_low = vmovl_u16(vget_low_u16(g_high)); let b_high_low = vmovl_u16(vget_low_u16(b_high)); - let (x_high_low, y_high_low, z_high_low) = - neon_triple_to_linear_u8::(r_high_low, g_high_low, b_high_low, transfer_function); + let (x_high_low, y_high_low, z_high_low) = neon_triple_to_linear_u8::( + r_high_low, + g_high_low, + b_high_low, + transfer_function, + ); let r_high_high = vmovl_high_u16(r_high); let g_high_high = vmovl_high_u16(g_high); let b_high_high = vmovl_high_u16(b_high); - let (x_high_high, y_high_high, z_high_high) = - neon_triple_to_linear_u8::(r_high_high, g_high_high, b_high_high, transfer_function); + let (x_high_high, y_high_high, z_high_high) = neon_triple_to_linear_u8::( + r_high_high, + g_high_high, + b_high_high, + transfer_function, + ); let r_u_norm = vcombine_u8( vqmovn_u16(vcombine_u16(vmovn_u32(x_low_low), vmovn_u32(x_low_high))), @@ -156,15 +169,23 @@ pub unsafe fn neon_channels_to_linear_u8< let g_low_low = vmovl_u16(vget_low_u16(g_low)); let b_low_low = vmovl_u16(vget_low_u16(b_low)); - let (x_low_low, y_low_low, z_low_low) = - neon_triple_to_linear_u8::(r_low_low, g_low_low, b_low_low, transfer_function); + let (x_low_low, y_low_low, z_low_low) = neon_triple_to_linear_u8::( + r_low_low, + g_low_low, + b_low_low, + transfer_function, + ); let r_low_high = vmovl_high_u16(r_low); let g_low_high = vmovl_high_u16(g_low); let b_low_high = vmovl_high_u16(b_low); - let (x_low_high, y_low_high, z_low_high) = - neon_triple_to_linear_u8::(r_low_high, g_low_high, b_low_high, transfer_function); + let (x_low_high, y_low_high, z_low_high) = neon_triple_to_linear_u8::( + r_low_high, + g_low_high, + b_low_high, + transfer_function, + ); let r_u_norm = vqmovn_u16(vcombine_u16(vmovn_u32(x_low_low), vmovn_u32(x_low_high))); @@ -199,8 +220,12 @@ pub unsafe fn neon_channels_to_linear_u8< let g_low_low = vmovl_u16(vget_low_u16(g_low)); let b_low_low = vmovl_u16(vget_low_u16(b_low)); - let (x_low_low, y_low_low, z_low_low) = - neon_triple_to_linear_u8::(r_low_low, g_low_low, b_low_low, transfer_function); + let (x_low_low, y_low_low, z_low_low) = neon_triple_to_linear_u8::( + r_low_low, + g_low_low, + b_low_low, + transfer_function, + ); let zeros = vdup_n_u16(0); diff --git a/src/oklab_to_image.rs b/src/oklab_to_image.rs index e6f09a3..157075e 100644 --- a/src/oklab_to_image.rs +++ b/src/oklab_to_image.rs @@ -14,6 +14,12 @@ use crate::oklch::Oklch; #[cfg(any(target_arch = "x86_64", target_arch = "x86"))] use crate::sse::sse_oklab_to_image; use crate::{Oklab, TransferFunction}; +#[cfg(feature = "rayon")] +use rayon::iter::{IndexedParallelIterator, ParallelIterator}; +#[cfg(feature = "rayon")] +use rayon::prelude::{ParallelSlice, ParallelSliceMut}; +#[cfg(feature = "rayon")] +use std::slice; #[allow(clippy::type_complexity)] fn oklab_to_image( @@ -28,9 +34,6 @@ fn oklab_to_image( let target: OklabTarget = TARGET.into(); let image_configuration: ImageConfiguration = CHANNELS_CONFIGURATION.into(); - let mut src_offset = 0usize; - let mut dst_offset = 0usize; - let mut _wide_row_handle: Option< unsafe fn(usize, *const f32, u32, *mut u8, u32, u32, TransferFunction) -> usize, > = None; @@ -47,72 +50,137 @@ fn oklab_to_image( #[cfg(all(target_arch = "aarch64", target_feature = "neon"))] { - _wide_row_handle = Some( - neon_oklab_to_image::< - CHANNELS_CONFIGURATION, - TARGET, - >, - ); + _wide_row_handle = Some(neon_oklab_to_image::); } - let channels = image_configuration.get_channels_count(); + #[cfg(feature = "rayon")] + { + let src_slice_safe_align = unsafe { + slice::from_raw_parts( + src.as_ptr() as *const u8, + src_stride as usize * height as usize, + ) + }; + dst.par_chunks_exact_mut(dst_stride as usize) + .zip(src_slice_safe_align.par_chunks_exact(src_stride as usize)) + .for_each(|(dst, src)| unsafe { + let channels = image_configuration.get_channels_count(); - for _ in 0..height as usize { - let mut _cx = 0usize; + let mut _cx = 0usize; - let src_ptr = unsafe { (src.as_ptr() as *const u8).add(src_offset) as *mut f32 }; - let dst_ptr = unsafe { dst.as_mut_ptr().add(dst_offset) }; + let src_ptr = src.as_ptr() as *mut f32; + let dst_ptr = dst.as_mut_ptr(); - if let Some(dispatcher) = _wide_row_handle { - unsafe { - _cx = dispatcher( - _cx, - src.as_ptr(), - src_offset as u32, - dst.as_mut_ptr(), - dst_offset as u32, - width, - transfer_function, - ) - } - } + if let Some(dispatcher) = _wide_row_handle { + _cx = dispatcher( + _cx, + src.as_ptr() as *const f32, + 0, + dst.as_mut_ptr(), + 0, + width, + transfer_function, + ) + } - for x in _cx..width as usize { - let px = x * channels; - let source_p = unsafe { src_ptr.add(px) }; - let l_x = unsafe { source_p.read_unaligned() }; - let l_y = unsafe { source_p.add(1).read_unaligned() }; - let l_z = unsafe { source_p.add(2).read_unaligned() }; - let rgb = match target { - OklabTarget::Oklab => { - let oklab = Oklab::new(l_x, l_y, l_z); - oklab.to_rgb(transfer_function) + for x in _cx..width as usize { + let px = x * channels; + let source_p = src_ptr.add(px); + let l_x = source_p.read_unaligned(); + let l_y = source_p.add(1).read_unaligned(); + let l_z = source_p.add(2).read_unaligned(); + let rgb = match target { + OklabTarget::Oklab => { + let oklab = Oklab::new(l_x, l_y, l_z); + oklab.to_rgb(transfer_function) + } + OklabTarget::Oklch => { + let oklch = Oklch::new(l_x, l_y, l_z); + oklch.to_rgb(transfer_function) + } + }; + + let dst = dst_ptr.add(x * channels); + dst.add(image_configuration.get_r_channel_offset()) + .write_unaligned(rgb.r); + dst.add(image_configuration.get_g_channel_offset()) + .write_unaligned(rgb.g); + dst.add(image_configuration.get_b_channel_offset()) + .write_unaligned(rgb.b); + if image_configuration.has_alpha() { + let l_a = source_p.add(3).read_unaligned(); + let a_value = (l_a * 255f32).max(0f32); + dst.add(image_configuration.get_a_channel_offset()) + .write_unaligned(a_value as u8); + } } - OklabTarget::Oklch => { - let oklch = Oklch::new(l_x, l_y, l_z); - oklch.to_rgb(transfer_function) + }); + } + + #[cfg(not(feature = "rayon"))] + { + let mut src_offset = 0usize; + let mut dst_offset = 0usize; + + let channels = image_configuration.get_channels_count(); + + for _ in 0..height as usize { + let mut _cx = 0usize; + + let src_ptr = unsafe { (src.as_ptr() as *const u8).add(src_offset) as *mut f32 }; + let dst_ptr = unsafe { dst.as_mut_ptr().add(dst_offset) }; + + if let Some(dispatcher) = _wide_row_handle { + unsafe { + _cx = dispatcher( + _cx, + src.as_ptr(), + src_offset as u32, + dst.as_mut_ptr(), + dst_offset as u32, + width, + transfer_function, + ) } - }; + } + + for x in _cx..width as usize { + let px = x * channels; + let source_p = unsafe { src_ptr.add(px) }; + let l_x = unsafe { source_p.read_unaligned() }; + let l_y = unsafe { source_p.add(1).read_unaligned() }; + let l_z = unsafe { source_p.add(2).read_unaligned() }; + let rgb = match target { + OklabTarget::Oklab => { + let oklab = Oklab::new(l_x, l_y, l_z); + oklab.to_rgb(transfer_function) + } + OklabTarget::Oklch => { + let oklch = Oklch::new(l_x, l_y, l_z); + oklch.to_rgb(transfer_function) + } + }; - unsafe { - let dst = dst_ptr.add(x * channels); - dst.add(image_configuration.get_r_channel_offset()) - .write_unaligned(rgb.r); - dst.add(image_configuration.get_g_channel_offset()) - .write_unaligned(rgb.g); - dst.add(image_configuration.get_b_channel_offset()) - .write_unaligned(rgb.b); - if image_configuration.has_alpha() { - let l_a = source_p.add(3).read_unaligned(); - let a_value = (l_a * 255f32).max(0f32); - dst.add(image_configuration.get_a_channel_offset()) - .write_unaligned(a_value as u8); + unsafe { + let dst = dst_ptr.add(x * channels); + dst.add(image_configuration.get_r_channel_offset()) + .write_unaligned(rgb.r); + dst.add(image_configuration.get_g_channel_offset()) + .write_unaligned(rgb.g); + dst.add(image_configuration.get_b_channel_offset()) + .write_unaligned(rgb.b); + if image_configuration.has_alpha() { + let l_a = source_p.add(3).read_unaligned(); + let a_value = (l_a * 255f32).max(0f32); + dst.add(image_configuration.get_a_channel_offset()) + .write_unaligned(a_value as u8); + } } } - } - src_offset += src_stride as usize; - dst_offset += dst_stride as usize; + src_offset += src_stride as usize; + dst_offset += dst_stride as usize; + } } } diff --git a/src/planar_to_linear.rs b/src/planar_to_linear.rs index 965cfc3..fe888e0 100644 --- a/src/planar_to_linear.rs +++ b/src/planar_to_linear.rs @@ -10,6 +10,12 @@ use crate::neon::planar_to_linear::neon_plane_to_linear; #[cfg(any(target_arch = "x86_64", target_arch = "x86"))] use crate::sse::sse_plane_to_linear; use crate::TransferFunction; +#[cfg(feature = "rayon")] +use rayon::iter::{IndexedParallelIterator, ParallelIterator}; +#[cfg(feature = "rayon")] +use rayon::prelude::{ParallelSlice, ParallelSliceMut}; +#[cfg(feature = "rayon")] +use std::slice; #[inline(always)] #[allow(clippy::type_complexity)] @@ -22,9 +28,6 @@ fn channels_to_linear( height: u32, transfer_function: TransferFunction, ) { - let mut src_offset = 0usize; - let mut dst_offset = 0usize; - let mut _wide_row_handler: Option< unsafe fn(usize, *const u8, usize, u32, *mut f32, usize, TransferFunction) -> usize, > = None; @@ -39,40 +42,79 @@ fn channels_to_linear( _wide_row_handler = Some(neon_plane_to_linear); } - for _ in 0..height as usize { - let mut _cx = 0usize; - - let src_ptr = unsafe { src.as_ptr().add(src_offset) }; - let dst_ptr = unsafe { (dst.as_mut_ptr() as *mut u8).add(dst_offset) as *mut f32 }; - - if let Some(dispatcher) = _wide_row_handler { - unsafe { - _cx = dispatcher( - _cx, - src.as_ptr(), - src_offset, - width, - dst.as_mut_ptr(), - dst_offset, - transfer_function, - ); + #[cfg(feature = "rayon")] + { + let dst_slice_safe_align = unsafe { + slice::from_raw_parts_mut( + dst.as_mut_ptr() as *mut u8, + dst_stride as usize * height as usize, + ) + }; + dst_slice_safe_align + .par_chunks_exact_mut(dst_stride as usize) + .zip(src.par_chunks_exact(src_stride as usize)) + .for_each(|(dst, src)| unsafe { + let mut _cx = 0usize; + + let src_ptr = src.as_ptr(); + let dst_ptr = dst.as_mut_ptr() as *mut f32; + + if let Some(dispatcher) = _wide_row_handler { + _cx = dispatcher(_cx, src_ptr, 0, width, dst_ptr, 0, transfer_function); + } + + for x in _cx..width as usize { + let px = x; + let dst = dst_ptr.add(px); + let src = src_ptr.add(px); + let pixel_f = src.read_unaligned() as f32 * (1. / 255.); + let transferred = transfer_function.linearize(pixel_f); + + dst.write_unaligned(transferred); + } + }); + } + + #[cfg(not(feature = "rayon"))] + { + let mut src_offset = 0usize; + let mut dst_offset = 0usize; + + for _ in 0..height as usize { + let mut _cx = 0usize; + + let src_ptr = unsafe { src.as_ptr().add(src_offset) }; + let dst_ptr = unsafe { (dst.as_mut_ptr() as *mut u8).add(dst_offset) as *mut f32 }; + + if let Some(dispatcher) = _wide_row_handler { + unsafe { + _cx = dispatcher( + _cx, + src.as_ptr(), + src_offset, + width, + dst.as_mut_ptr(), + dst_offset, + transfer_function, + ); + } } - } - for x in _cx..width as usize { - let px = x; - let dst = unsafe { dst_ptr.add(px) }; - let src = unsafe { src_ptr.add(px) }; - let pixel_f = unsafe { src.read_unaligned() as f32 } * (1. / 255.); - let transferred = transfer_function.linearize(pixel_f); + for x in _cx..width as usize { + let px = x; + let dst = unsafe { dst_ptr.add(px) }; + let src = unsafe { src_ptr.add(px) }; + let pixel_f = unsafe { src.read_unaligned() as f32 } * (1. / 255.); + let transferred = transfer_function.linearize(pixel_f); - unsafe { - dst.write_unaligned(transferred); + unsafe { + dst.write_unaligned(transferred); + } } - } - src_offset += src_stride as usize; - dst_offset += dst_stride as usize; + src_offset += src_stride as usize; + dst_offset += dst_stride as usize; + } } } diff --git a/src/sigmoidal_to_image.rs b/src/sigmoidal_to_image.rs index 1b3b6863..7ef23b1 100644 --- a/src/sigmoidal_to_image.rs +++ b/src/sigmoidal_to_image.rs @@ -13,6 +13,12 @@ use crate::neon::neon_from_sigmoidal_row; #[cfg(any(target_arch = "x86_64", target_arch = "x86"))] use crate::sse::sse_from_sigmoidal_row; use crate::{Rgb, Sigmoidal}; +#[cfg(feature = "rayon")] +use rayon::iter::{IndexedParallelIterator, ParallelIterator}; +#[cfg(feature = "rayon")] +use rayon::prelude::{ParallelSlice, ParallelSliceMut}; +#[cfg(feature = "rayon")] +use std::slice; #[allow(clippy::type_complexity)] fn sigmoidal_to_image( @@ -28,9 +34,6 @@ fn sigmoidal_to_image( panic!("Alpha may be set only on images with alpha"); } - let mut src_offset = 0usize; - let mut dst_offset = 0usize; - let channels = image_configuration.get_channels_count(); let mut _wide_row_handler: Option usize> = None; @@ -50,53 +53,112 @@ fn sigmoidal_to_image( _wide_row_handler = Some(neon_from_sigmoidal_row::); } - for _ in 0..height as usize { - let mut _cx = 0usize; + #[cfg(feature = "rayon")] + { + let src_slice_safe_align = unsafe { + slice::from_raw_parts( + src.as_ptr() as *const u8, + src_stride as usize * height as usize, + ) + }; + dst.par_chunks_exact_mut(dst_stride as usize) + .zip(src_slice_safe_align.par_chunks_exact(src_stride as usize)) + .for_each(|(dst, src)| unsafe { + let mut _cx = 0usize; + + let src_ptr = src.as_ptr() as *const f32; + let dst_ptr = dst.as_mut_ptr(); + + if let Some(dispatcher) = _wide_row_handler { + _cx = dispatcher(_cx, src_ptr, dst_ptr, width); + } - let src_ptr = unsafe { (src.as_ptr() as *const u8).add(src_offset) as *const f32 }; - let dst_ptr = unsafe { dst.as_mut_ptr().add(dst_offset) }; + for x in _cx..width as usize { + let px = x * channels; + let reading_ptr = src_ptr.add(px); + let sr = reading_ptr.read_unaligned(); + let sg = reading_ptr.add(1).read_unaligned(); + let sb = reading_ptr.add(2).read_unaligned(); + + let sigmoidal = Sigmoidal::new(sr, sg, sb); + let rgb: Rgb = sigmoidal.into(); + + let hx = x * channels; + + let dst = dst_ptr.add(hx); + + dst.add(image_configuration.get_r_channel_offset()) + .write_unaligned(rgb.r); + dst.add(image_configuration.get_g_channel_offset()) + .write_unaligned(rgb.g); + dst.add(image_configuration.get_b_channel_offset()) + .write_unaligned(rgb.b); + + if image_configuration.has_alpha() { + let a = (reading_ptr.add(3).read_unaligned() * 255f32) + .max(0f32) + .round() + .min(255f32); + dst.add(image_configuration.get_a_channel_offset()) + .write_unaligned(a as u8); + } + } + }); + } + + #[cfg(not(feature = "rayon"))] + { + let mut src_offset = 0usize; + let mut dst_offset = 0usize; - if let Some(dispatcher) = _wide_row_handler { - unsafe { - _cx = dispatcher(_cx, src_ptr, dst_ptr, width); + for _ in 0..height as usize { + let mut _cx = 0usize; + + let src_ptr = unsafe { (src.as_ptr() as *const u8).add(src_offset) as *const f32 }; + let dst_ptr = unsafe { dst.as_mut_ptr().add(dst_offset) }; + + if let Some(dispatcher) = _wide_row_handler { + unsafe { + _cx = dispatcher(_cx, src_ptr, dst_ptr, width); + } } - } - for x in _cx..width as usize { - let px = x * channels; - let reading_ptr = unsafe { src_ptr.add(px) }; - let sr = unsafe { reading_ptr.read_unaligned() }; - let sg = unsafe { reading_ptr.add(1).read_unaligned() }; - let sb = unsafe { reading_ptr.add(2).read_unaligned() }; - - let sigmoidal = Sigmoidal::new(sr, sg, sb); - let rgb: Rgb = sigmoidal.into(); - - let hx = x * channels; - - let dst = unsafe { dst_ptr.add(hx) }; - - unsafe { - dst.add(image_configuration.get_r_channel_offset()) - .write_unaligned(rgb.r); - dst.add(image_configuration.get_g_channel_offset()) - .write_unaligned(rgb.g); - dst.add(image_configuration.get_b_channel_offset()) - .write_unaligned(rgb.b); - - if image_configuration.has_alpha() { - let a = (reading_ptr.add(3).read_unaligned() * 255f32) - .max(0f32) - .round() - .min(255f32); - dst.add(image_configuration.get_a_channel_offset()) - .write_unaligned(a as u8); + for x in _cx..width as usize { + let px = x * channels; + let reading_ptr = unsafe { src_ptr.add(px) }; + let sr = unsafe { reading_ptr.read_unaligned() }; + let sg = unsafe { reading_ptr.add(1).read_unaligned() }; + let sb = unsafe { reading_ptr.add(2).read_unaligned() }; + + let sigmoidal = Sigmoidal::new(sr, sg, sb); + let rgb: Rgb = sigmoidal.into(); + + let hx = x * channels; + + let dst = unsafe { dst_ptr.add(hx) }; + + unsafe { + dst.add(image_configuration.get_r_channel_offset()) + .write_unaligned(rgb.r); + dst.add(image_configuration.get_g_channel_offset()) + .write_unaligned(rgb.g); + dst.add(image_configuration.get_b_channel_offset()) + .write_unaligned(rgb.b); + + if image_configuration.has_alpha() { + let a = (reading_ptr.add(3).read_unaligned() * 255f32) + .max(0f32) + .round() + .min(255f32); + dst.add(image_configuration.get_a_channel_offset()) + .write_unaligned(a as u8); + } } } - } - src_offset += src_stride as usize; - dst_offset += dst_stride as usize; + src_offset += src_stride as usize; + dst_offset += dst_stride as usize; + } } } diff --git a/src/sse/image_to_jzazbz.rs b/src/sse/image_to_jzazbz.rs index 017f9e3..053e896 100644 --- a/src/sse/image_to_jzazbz.rs +++ b/src/sse/image_to_jzazbz.rs @@ -14,7 +14,10 @@ use erydanos::{_mm_atan2_ps, _mm_hypot_fast_ps, _mm_isnan_ps, _mm_mlaf_ps, _mm_p use crate::image::ImageConfiguration; use crate::image_to_jzazbz::JzazbzTarget; -use crate::sse::{_mm_color_matrix_ps, _mm_pow_n_ps, _mm_select_ps, perform_sse_linear_transfer, sse_deinterleave_rgb, sse_deinterleave_rgba, sse_interleave_ps_rgb, sse_interleave_ps_rgba}; +use crate::sse::{ + _mm_color_matrix_ps, _mm_pow_n_ps, _mm_select_ps, perform_sse_linear_transfer, + sse_deinterleave_rgb, sse_deinterleave_rgba, sse_interleave_ps_rgb, sse_interleave_ps_rgba, +}; use crate::{ load_u8_and_deinterleave, load_u8_and_deinterleave_half, store_and_interleave_v3_direct_f32, store_and_interleave_v4_direct_f32, TransferFunction, SRGB_TO_XYZ_D65, @@ -151,8 +154,14 @@ pub unsafe fn sse_image_to_jzazbz(g_low)); let b_low_high = _mm_cvtepu16_epi32(_mm_srli_si128::<8>(b_low)); - let (x_low_high, y_low_high, z_low_high) = - triple_to_jzazbz!(r_low_high, g_low_high, b_low_high, transfer_function, target, luminance); + let (x_low_high, y_low_high, z_low_high) = triple_to_jzazbz!( + r_low_high, + g_low_high, + b_low_high, + transfer_function, + target, + luminance + ); if image_configuration.has_alpha() { let a_low_high = _mm_mul_ps( @@ -197,8 +212,14 @@ pub unsafe fn sse_image_to_jzazbz(a_chan)); @@ -260,8 +281,14 @@ pub unsafe fn sse_image_to_jzazbz(g_low)); let b_low_high = _mm_cvtepu16_epi32(_mm_srli_si128::<8>(b_low)); - let (x_low_high, y_low_high, z_low_high) = - triple_to_jzazbz!(r_low_high, g_low_high, b_low_high, transfer_function, target, luminance); + let (x_low_high, y_low_high, z_low_high) = triple_to_jzazbz!( + r_low_high, + g_low_high, + b_low_high, + transfer_function, + target, + luminance + ); if image_configuration.has_alpha() { let a_low_high = _mm_mul_ps( diff --git a/src/sse/image_to_linear_u8.rs b/src/sse/image_to_linear_u8.rs index de96a2e..35e73c0 100644 --- a/src/sse/image_to_linear_u8.rs +++ b/src/sse/image_to_linear_u8.rs @@ -94,15 +94,23 @@ pub mod sse_image_to_linear_unsigned { let g_low_low = _mm_cvtepu16_epi32(g_low); let b_low_low = _mm_cvtepu16_epi32(b_low); - let (x_low_low, y_low_low, z_low_low) = - sse_triple_to_linear_u8::(r_low_low, g_low_low, b_low_low, transfer_function); + let (x_low_low, y_low_low, z_low_low) = sse_triple_to_linear_u8::( + r_low_low, + g_low_low, + b_low_low, + transfer_function, + ); let r_low_high = _mm_unpackhi_epi16(r_low, zeros); let g_low_high = _mm_unpackhi_epi16(g_low, zeros); let b_low_high = _mm_unpackhi_epi16(b_low, zeros); - let (x_low_high, y_low_high, z_low_high) = - sse_triple_to_linear_u8::(r_low_high, g_low_high, b_low_high, transfer_function); + let (x_low_high, y_low_high, z_low_high) = sse_triple_to_linear_u8::( + r_low_high, + g_low_high, + b_low_high, + transfer_function, + ); let r_high = _mm_unpackhi_epi8(r_chan, zeros); let g_high = _mm_unpackhi_epi8(g_chan, zeros); @@ -112,15 +120,23 @@ pub mod sse_image_to_linear_unsigned { let g_high_low = _mm_cvtepu16_epi32(g_high); let b_high_low = _mm_cvtepu16_epi32(b_high); - let (x_high_low, y_high_low, z_high_low) = - sse_triple_to_linear_u8::(r_high_low, g_high_low, b_high_low, transfer_function); + let (x_high_low, y_high_low, z_high_low) = sse_triple_to_linear_u8::( + r_high_low, + g_high_low, + b_high_low, + transfer_function, + ); let r_high_high = _mm_unpackhi_epi16(r_high, zeros); let g_high_high = _mm_unpackhi_epi16(g_high, zeros); let b_high_high = _mm_unpackhi_epi16(b_high, zeros); - let (x_high_high, y_high_high, z_high_high) = - sse_triple_to_linear_u8::(r_high_high, g_high_high, b_high_high, transfer_function); + let (x_high_high, y_high_high, z_high_high) = sse_triple_to_linear_u8::( + r_high_high, + g_high_high, + b_high_high, + transfer_function, + ); let r_u_norm = _mm_packus_epi16( _mm_packus_epi32(x_low_low, x_low_high), @@ -168,15 +184,23 @@ pub mod sse_image_to_linear_unsigned { let g_low_low = _mm_cvtepu16_epi32(g_low); let b_low_low = _mm_cvtepu16_epi32(b_low); - let (x_low_low, y_low_low, z_low_low) = - sse_triple_to_linear_u8::(r_low_low, g_low_low, b_low_low, transfer_function); + let (x_low_low, y_low_low, z_low_low) = sse_triple_to_linear_u8::( + r_low_low, + g_low_low, + b_low_low, + transfer_function, + ); let r_low_high = _mm_unpackhi_epi16(r_low, zeros); let g_low_high = _mm_unpackhi_epi16(g_low, zeros); let b_low_high = _mm_unpackhi_epi16(b_low, zeros); - let (x_low_high, y_low_high, z_low_high) = - sse_triple_to_linear_u8::(r_low_high, g_low_high, b_low_high, transfer_function); + let (x_low_high, y_low_high, z_low_high) = sse_triple_to_linear_u8::( + r_low_high, + g_low_high, + b_low_high, + transfer_function, + ); let r_u_norm = _mm_packus_epi16(_mm_packus_epi32(x_low_low, x_low_high), zeros); diff --git a/src/sse/image_to_oklab.rs b/src/sse/image_to_oklab.rs index 96afd06..5107986 100644 --- a/src/sse/image_to_oklab.rs +++ b/src/sse/image_to_oklab.rs @@ -14,8 +14,8 @@ use std::arch::x86_64::*; use crate::image::ImageConfiguration; use crate::image_to_oklab::OklabTarget; use crate::sse::{ - _mm_color_matrix_ps, sse_deinterleave_rgb, sse_deinterleave_rgba, - sse_interleave_ps_rgb, sse_interleave_ps_rgba, + _mm_color_matrix_ps, sse_deinterleave_rgb, sse_deinterleave_rgba, sse_interleave_ps_rgb, + sse_interleave_ps_rgba, }; use crate::{ load_u8_and_deinterleave, load_u8_and_deinterleave_half, store_and_interleave_v3_direct_f32, diff --git a/src/sse/xyz_lab_to_image.rs b/src/sse/xyz_lab_to_image.rs index 6a56357..28b9098 100644 --- a/src/sse/xyz_lab_to_image.rs +++ b/src/sse/xyz_lab_to_image.rs @@ -8,8 +8,8 @@ use crate::image::ImageConfiguration; use crate::sse::cie::{sse_lab_to_xyz, sse_lch_to_xyz, sse_luv_to_xyz}; use crate::sse::{ - _mm_color_matrix_ps, perform_sse_gamma_transfer, - sse_deinterleave_rgb_ps, sse_interleave_rgb, sse_interleave_rgba, + _mm_color_matrix_ps, perform_sse_gamma_transfer, sse_deinterleave_rgb_ps, sse_interleave_rgb, + sse_interleave_rgba, }; use crate::xyz_target::XyzTarget; use crate::TransferFunction; diff --git a/src/sse/xyza_laba_to_image.rs b/src/sse/xyza_laba_to_image.rs index 68ae234..fac74d4 100644 --- a/src/sse/xyza_laba_to_image.rs +++ b/src/sse/xyza_laba_to_image.rs @@ -8,8 +8,7 @@ use crate::image::ImageConfiguration; use crate::sse::cie::{sse_lab_to_xyz, sse_lch_to_xyz, sse_luv_to_xyz}; use crate::sse::{ - _mm_color_matrix_ps, perform_sse_gamma_transfer, - sse_deinterleave_rgba_ps, sse_interleave_rgba, + _mm_color_matrix_ps, perform_sse_gamma_transfer, sse_deinterleave_rgba_ps, sse_interleave_rgba, }; use crate::xyz_target::XyzTarget; use crate::TransferFunction; diff --git a/src/xyz.rs b/src/xyz.rs index 8b95b49..cbdca90 100644 --- a/src/xyz.rs +++ b/src/xyz.rs @@ -181,7 +181,12 @@ impl Xyz { let b = x * (*(*matrix.get_unchecked(2)).get_unchecked(0)) + y * (*(*matrix.get_unchecked(2)).get_unchecked(1)) + z * (*(*matrix.get_unchecked(2)).get_unchecked(2)); - Rgb::::new(transfer_function.gamma(r), transfer_function.gamma(g), transfer_function.gamma(b)).to_u8() + Rgb::::new( + transfer_function.gamma(r), + transfer_function.gamma(g), + transfer_function.gamma(b), + ) + .to_u8() } } diff --git a/src/xyz_lab_to_image.rs b/src/xyz_lab_to_image.rs index eb53f22..ba8c885 100644 --- a/src/xyz_lab_to_image.rs +++ b/src/xyz_lab_to_image.rs @@ -15,6 +15,12 @@ use crate::neon::neon_xyz_to_channels; use crate::sse::sse_xyz_to_channels; use crate::xyz_target::XyzTarget; use crate::{LCh, Lab, Luv, Xyz, XYZ_TO_SRGB_D65}; +#[cfg(feature = "rayon")] +use rayon::iter::{IndexedParallelIterator, ParallelIterator}; +#[cfg(feature = "rayon")] +use rayon::prelude::{ParallelSlice, ParallelSliceMut}; +#[cfg(feature = "rayon")] +use std::slice; fn xyz_to_channels( src: &[f32], @@ -34,10 +40,6 @@ fn xyz_to_channels); } - for _ in 0..height as usize { - let mut _cx = 0usize; - - if let Some(dispatcher) = _wide_row_handler { - unsafe { - _cx = dispatcher( - _cx, - src.as_ptr(), - src_offset, - a_channel.as_ptr(), - a_offset, - dst.as_mut_ptr(), - dst_offset, - width, - matrix, - transfer_function, - ); - } - } + #[cfg(feature = "rayon")] + { + let src_slice_safe_align = unsafe { + slice::from_raw_parts_mut( + src.as_ptr() as *mut u8, + src_stride as usize * height as usize, + ) + }; - let src_ptr = unsafe { (src.as_ptr() as *const u8).add(src_offset) as *mut f32 }; - let dst_ptr = unsafe { dst.as_mut_ptr().add(dst_offset) }; - - for x in _cx..width as usize { - let src_slice = unsafe { src_ptr.add(x * 3) }; - let l_x = unsafe { src_slice.read_unaligned() }; - let l_y = unsafe { src_slice.add(1).read_unaligned() }; - let l_z = unsafe { src_slice.add(2).read_unaligned() }; - let rgb = match source { - XyzTarget::Lab => { - let lab = Lab::new(l_x, l_y, l_z); - lab.to_rgb() - } - XyzTarget::Xyz => { - let xyz = Xyz::new(l_x, l_y, l_z); - xyz.to_rgb(matrix, transfer_function) - } - XyzTarget::Luv => { - let luv = Luv::new(l_x, l_y, l_z); - luv.to_rgb() - } - XyzTarget::Lch => { - let lch = LCh::new(l_x, l_y, l_z); - lch.to_rgb() - } + if USE_ALPHA { + let a_slice_safe_align = unsafe { + slice::from_raw_parts_mut( + a_channel.as_ptr() as *mut u8, + a_stride as usize * height as usize, + ) }; - let dst = unsafe { dst_ptr.add(x * channels) }; + dst.par_chunks_exact_mut(dst_stride as usize) + .zip(src_slice_safe_align.par_chunks_exact_mut(src_stride as usize)) + .zip(a_slice_safe_align.par_chunks_exact(a_stride as usize)) + .for_each(|((dst, src), a_channel)| unsafe { + let mut _cx = 0usize; + + if let Some(dispatcher) = _wide_row_handler { + _cx = dispatcher( + _cx, + src.as_ptr() as *const f32, + 0, + a_channel.as_ptr() as *const f32, + 0, + dst.as_mut_ptr(), + 0, + width, + matrix, + transfer_function, + ); + } + + let src_ptr = src.as_ptr() as *mut f32; + let dst_ptr = dst.as_mut_ptr(); + + for x in _cx..width as usize { + let src_slice = src_ptr.add(x * 3); + let l_x = src_slice.read_unaligned(); + let l_y = src_slice.add(1).read_unaligned(); + let l_z = src_slice.add(2).read_unaligned(); + let rgb = match source { + XyzTarget::Lab => { + let lab = Lab::new(l_x, l_y, l_z); + lab.to_rgb() + } + XyzTarget::Xyz => { + let xyz = Xyz::new(l_x, l_y, l_z); + xyz.to_rgb(matrix, transfer_function) + } + XyzTarget::Luv => { + let luv = Luv::new(l_x, l_y, l_z); + luv.to_rgb() + } + XyzTarget::Lch => { + let lch = LCh::new(l_x, l_y, l_z); + lch.to_rgb() + } + }; + + let dst = dst_ptr.add(x * channels); + + dst.add(image_configuration.get_r_channel_offset()) + .write_unaligned(rgb.r); + dst.add(image_configuration.get_g_channel_offset()) + .write_unaligned(rgb.g); + dst.add(image_configuration.get_b_channel_offset()) + .write_unaligned(rgb.b); + if image_configuration.has_alpha() { + let a_ptr = a_channel.as_ptr() as *const f32; + let a_f = a_ptr.add(x).read_unaligned(); + let a_value = (a_f * 255f32).max(0f32); + dst.add(image_configuration.get_a_channel_offset()) + .write_unaligned(a_value as u8); + } + } + }); + } else { + dst.par_chunks_exact_mut(dst_stride as usize) + .zip(src_slice_safe_align.par_chunks_exact_mut(src_stride as usize)) + .for_each(|(dst, src)| unsafe { + let mut _cx = 0usize; + + if let Some(dispatcher) = _wide_row_handler { + _cx = dispatcher( + _cx, + src.as_ptr() as *const f32, + 0, + std::ptr::null(), + 0, + dst.as_mut_ptr(), + 0, + width, + matrix, + transfer_function, + ); + } + + let src_ptr = src.as_ptr() as *mut f32; + let dst_ptr = dst.as_mut_ptr(); + + for x in _cx..width as usize { + let src_slice = src_ptr.add(x * 3); + let l_x = src_slice.read_unaligned(); + let l_y = src_slice.add(1).read_unaligned(); + let l_z = src_slice.add(2).read_unaligned(); + let rgb = match source { + XyzTarget::Lab => { + let lab = Lab::new(l_x, l_y, l_z); + lab.to_rgb() + } + XyzTarget::Xyz => { + let xyz = Xyz::new(l_x, l_y, l_z); + xyz.to_rgb(matrix, transfer_function) + } + XyzTarget::Luv => { + let luv = Luv::new(l_x, l_y, l_z); + luv.to_rgb() + } + XyzTarget::Lch => { + let lch = LCh::new(l_x, l_y, l_z); + lch.to_rgb() + } + }; - unsafe { - dst.add(image_configuration.get_r_channel_offset()) - .write_unaligned(rgb.r); - dst.add(image_configuration.get_g_channel_offset()) - .write_unaligned(rgb.g); - dst.add(image_configuration.get_b_channel_offset()) - .write_unaligned(rgb.b); + let dst = dst_ptr.add(x * channels); + + dst.add(image_configuration.get_r_channel_offset()) + .write_unaligned(rgb.r); + dst.add(image_configuration.get_g_channel_offset()) + .write_unaligned(rgb.g); + dst.add(image_configuration.get_b_channel_offset()) + .write_unaligned(rgb.b); + } + }); + } + } + + #[cfg(not(feature = "rayon"))] + { + let mut src_offset = 0usize; + let mut dst_offset = 0usize; + let mut a_offset = 0usize; + + for _ in 0..height as usize { + let mut _cx = 0usize; + + if let Some(dispatcher) = _wide_row_handler { + unsafe { + _cx = dispatcher( + _cx, + src.as_ptr(), + src_offset, + a_channel.as_ptr(), + a_offset, + dst.as_mut_ptr(), + dst_offset, + width, + matrix, + transfer_function, + ); + } } - if image_configuration.has_alpha() { - let a_ptr = - unsafe { (a_channel.as_ptr() as *const u8).add(a_offset) as *const f32 }; - let a_f = unsafe { a_ptr.add(x).read_unaligned() }; - let a_value = (a_f * 255f32).max(0f32); + + let src_ptr = unsafe { (src.as_ptr() as *const u8).add(src_offset) as *mut f32 }; + let dst_ptr = unsafe { dst.as_mut_ptr().add(dst_offset) }; + + for x in _cx..width as usize { + let src_slice = unsafe { src_ptr.add(x * 3) }; + let l_x = unsafe { src_slice.read_unaligned() }; + let l_y = unsafe { src_slice.add(1).read_unaligned() }; + let l_z = unsafe { src_slice.add(2).read_unaligned() }; + let rgb = match source { + XyzTarget::Lab => { + let lab = Lab::new(l_x, l_y, l_z); + lab.to_rgb() + } + XyzTarget::Xyz => { + let xyz = Xyz::new(l_x, l_y, l_z); + xyz.to_rgb(matrix, transfer_function) + } + XyzTarget::Luv => { + let luv = Luv::new(l_x, l_y, l_z); + luv.to_rgb() + } + XyzTarget::Lch => { + let lch = LCh::new(l_x, l_y, l_z); + lch.to_rgb() + } + }; + + let dst = unsafe { dst_ptr.add(x * channels) }; + unsafe { - dst.add(image_configuration.get_a_channel_offset()) - .write_unaligned(a_value as u8); + dst.add(image_configuration.get_r_channel_offset()) + .write_unaligned(rgb.r); + dst.add(image_configuration.get_g_channel_offset()) + .write_unaligned(rgb.g); + dst.add(image_configuration.get_b_channel_offset()) + .write_unaligned(rgb.b); + } + if image_configuration.has_alpha() { + let a_ptr = + unsafe { (a_channel.as_ptr() as *const u8).add(a_offset) as *const f32 }; + let a_f = unsafe { a_ptr.add(x).read_unaligned() }; + let a_value = (a_f * 255f32).max(0f32); + unsafe { + dst.add(image_configuration.get_a_channel_offset()) + .write_unaligned(a_value as u8); + } } } - } - src_offset += src_stride as usize; - dst_offset += dst_stride as usize; - a_offset += a_stride as usize; + src_offset += src_stride as usize; + dst_offset += dst_stride as usize; + a_offset += a_stride as usize; + } } } diff --git a/src/xyza_laba_to_image.rs b/src/xyza_laba_to_image.rs index 42a3a27..9032b3e 100644 --- a/src/xyza_laba_to_image.rs +++ b/src/xyza_laba_to_image.rs @@ -15,6 +15,12 @@ use crate::neon::neon_xyza_to_image; use crate::sse::sse_xyza_to_image; use crate::xyz_target::XyzTarget; use crate::{LCh, Lab, Luv, Xyz, XYZ_TO_SRGB_D65}; +#[cfg(feature = "rayon")] +use rayon::iter::{IndexedParallelIterator, ParallelIterator}; +#[cfg(feature = "rayon")] +use rayon::prelude::{ParallelSlice, ParallelSliceMut}; +#[cfg(feature = "rayon")] +use std::slice; #[allow(clippy::type_complexity)] fn xyz_with_alpha_to_channels( @@ -61,73 +67,146 @@ fn xyz_with_alpha_to_channels); } - let mut src_offset = 0usize; - let mut dst_offset = 0usize; + #[cfg(feature = "rayon")] + { + let src_slice_safe_align = unsafe { + slice::from_raw_parts( + src.as_ptr() as *const u8, + src_stride as usize * height as usize, + ) + }; + dst.par_chunks_exact_mut(dst_stride as usize) + .zip(src_slice_safe_align.par_chunks_exact(src_stride as usize)) + .for_each(|(dst, src)| unsafe { + let channels = image_configuration.get_channels_count(); - let channels = image_configuration.get_channels_count(); + let mut _cx = 0usize; - for _ in 0..height as usize { - let mut _cx = 0usize; + if let Some(dispatcher) = _wide_row_handler { + _cx = dispatcher( + _cx, + src.as_ptr() as *const f32, + 0, + dst.as_mut_ptr(), + 0, + width, + matrix, + transfer_function, + ) + } - if let Some(dispatcher) = _wide_row_handler { - unsafe { - _cx = dispatcher( - _cx, - src.as_ptr(), - src_offset, - dst.as_mut_ptr(), - dst_offset, - width, - matrix, - transfer_function, - ) - } - } + let src_ptr = src.as_ptr() as *mut f32; + let dst_ptr = dst.as_mut_ptr(); - let src_ptr = unsafe { (src.as_ptr() as *const u8).add(src_offset) as *mut f32 }; - let dst_ptr = unsafe { dst.as_mut_ptr().add(dst_offset) }; + for x in _cx..width as usize { + let px = x * 4; + let l_x = src_ptr.add(px).read_unaligned(); + let l_y = src_ptr.add(px + 1).read_unaligned(); + let l_z = src_ptr.add(px + 2).read_unaligned(); + let rgb = match source { + XyzTarget::Lab => { + let lab = Lab::new(l_x, l_y, l_z); + lab.to_rgb() + } + XyzTarget::Xyz => { + let xyz = Xyz::new(l_x, l_y, l_z); + xyz.to_rgb(matrix, transfer_function) + } + XyzTarget::Luv => { + let luv = Luv::new(l_x, l_y, l_z); + luv.to_rgb() + } + XyzTarget::Lch => { + let lch = LCh::new(l_x, l_y, l_z); + lch.to_rgb() + } + }; - for x in _cx..width as usize { - let px = x * 4; - let l_x = unsafe { src_ptr.add(px).read_unaligned() }; - let l_y = unsafe { src_ptr.add(px + 1).read_unaligned() }; - let l_z = unsafe { src_ptr.add(px + 2).read_unaligned() }; - let rgb = match source { - XyzTarget::Lab => { - let lab = Lab::new(l_x, l_y, l_z); - lab.to_rgb() - } - XyzTarget::Xyz => { - let xyz = Xyz::new(l_x, l_y, l_z); - xyz.to_rgb(matrix, transfer_function) + let l_a = src_ptr.add(px + 3).read_unaligned(); + let a_value = (l_a * 255f32).max(0f32); + let dst = dst_ptr.add(x * channels); + dst.add(image_configuration.get_r_channel_offset()) + .write_unaligned(rgb.r); + dst.add(image_configuration.get_g_channel_offset()) + .write_unaligned(rgb.g); + dst.add(image_configuration.get_b_channel_offset()) + .write_unaligned(rgb.b); + dst.add(image_configuration.get_a_channel_offset()) + .write_unaligned(a_value as u8); } - XyzTarget::Luv => { - let luv = Luv::new(l_x, l_y, l_z); - luv.to_rgb() - } - XyzTarget::Lch => { - let lch = LCh::new(l_x, l_y, l_z); - lch.to_rgb() + }); + } + + #[cfg(not(feature = "rayon"))] + { + let mut src_offset = 0usize; + let mut dst_offset = 0usize; + + let channels = image_configuration.get_channels_count(); + + for _ in 0..height as usize { + let mut _cx = 0usize; + + if let Some(dispatcher) = _wide_row_handler { + unsafe { + _cx = dispatcher( + _cx, + src.as_ptr(), + src_offset, + dst.as_mut_ptr(), + dst_offset, + width, + matrix, + transfer_function, + ) } - }; + } + + let src_ptr = unsafe { (src.as_ptr() as *const u8).add(src_offset) as *mut f32 }; + let dst_ptr = unsafe { dst.as_mut_ptr().add(dst_offset) }; - let l_a = unsafe { src_ptr.add(px + 3).read_unaligned() }; - let a_value = (l_a * 255f32).max(0f32); - unsafe { - let dst = dst_ptr.add(x * channels); - dst.add(image_configuration.get_r_channel_offset()) - .write_unaligned(rgb.r); - dst.add(image_configuration.get_g_channel_offset()) - .write_unaligned(rgb.g); - dst.add(image_configuration.get_b_channel_offset()) - .write_unaligned(rgb.b); - dst.add(image_configuration.get_a_channel_offset()) - .write_unaligned(a_value as u8); + for x in _cx..width as usize { + let px = x * 4; + let l_x = unsafe { src_ptr.add(px).read_unaligned() }; + let l_y = unsafe { src_ptr.add(px + 1).read_unaligned() }; + let l_z = unsafe { src_ptr.add(px + 2).read_unaligned() }; + let rgb = match source { + XyzTarget::Lab => { + let lab = Lab::new(l_x, l_y, l_z); + lab.to_rgb() + } + XyzTarget::Xyz => { + let xyz = Xyz::new(l_x, l_y, l_z); + xyz.to_rgb(matrix, transfer_function) + } + XyzTarget::Luv => { + let luv = Luv::new(l_x, l_y, l_z); + luv.to_rgb() + } + XyzTarget::Lch => { + let lch = LCh::new(l_x, l_y, l_z); + lch.to_rgb() + } + }; + + let l_a = unsafe { src_ptr.add(px + 3).read_unaligned() }; + let a_value = (l_a * 255f32).max(0f32); + unsafe { + let dst = dst_ptr.add(x * channels); + dst.add(image_configuration.get_r_channel_offset()) + .write_unaligned(rgb.r); + dst.add(image_configuration.get_g_channel_offset()) + .write_unaligned(rgb.g); + dst.add(image_configuration.get_b_channel_offset()) + .write_unaligned(rgb.b); + dst.add(image_configuration.get_a_channel_offset()) + .write_unaligned(a_value as u8); + } } - } - src_offset += src_stride as usize; - dst_offset += dst_stride as usize; + src_offset += src_stride as usize; + dst_offset += dst_stride as usize; + } } }